//////////////////////////////////////////////////////////////////////////////
//
//                      INTEL CONFIDENTIAL
//       Copyright (C) Intel Corporation All Rights Reserved.
//
// The source code contained or described herein and all documents related to
// the source code ("Material") are owned by Intel Corporation or its
// suppliers. Title to the Material remains with Intel Corporation, its
// suppliers, or licensors. The Material contains trade secrets and
// proprietary and confidential information of Intel Corporation, its
// suppliers, and licensors, and is protected by worldwide copyright and trade
// secret laws and treaty provisions. No part of the Material may be used,
// copied, reproduced, modified, published, uploaded, posted, transmitted,
// distributed, or disclosed in any way without Intel's prior express written
// permission.
//
// No license under any patent, copyright, trade secret or other intellectual
// property right is granted to or conferred upon you by disclosure or
// delivery of the Materials, either expressly, by implication, inducement,
// estoppel or otherwise. Any license under such intellectual property rights
// must be express and approved by Intel in writing.
//
// Unless otherwise agreed by Intel in writing, you may not remove or alter
// this notice or any other notice embedded in Materials by Intel or Intel's
// suppliers or licensors in any way.
//
//////////////////////////////////////////////////////////////////////////////
///  @file
///
///  @brief Implementation of the various functions for creating, destroying,
///         and manipulating BitData objects.  These are encapsulated in the
///         NBitData namespace.
///
//////////////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <cstdint>
#include <cmath>
#include <cassert>
#include <iostream>
#include <vector>
#include <algorithm>
#include <cinttypes>

#include <SafeString.h>
#include <MemoryCopy.h>
#include "Internal_BitData.h"


#if defined(HOST_WINDOWS)
#include <intrin.h> // for __cpuid
void _cpuid(int cpuInfo[4], int function_id)
{
    __cpuid(cpuInfo, function_id);
}

#elif defined(HOST_LINUX) || defined(HOST_DARWIN)
#include <cpuid.h> // for __get_cpuid

// Linux version of __cpuid().
void _cpuid(int cpuInfo[4], int function_id)
{
    __get_cpuid(function_id,
        reinterpret_cast<unsigned int *>(&cpuInfo[0]), // EAX
        reinterpret_cast<unsigned int *>(&cpuInfo[1]), // EBX
        reinterpret_cast<unsigned int *>(&cpuInfo[2]), // ECX
        reinterpret_cast<unsigned int *>(&cpuInfo[3]));// EDX
}

#endif

namespace // Anonymous
{
	typedef union split_union { uint64_t value; struct split_part { uint32_t low, high; } part; } split_uint64;

    #ifdef HOST_X64
	    typedef uint64_t UIntBase;
	    typedef int64_t IntBase;
    #else
	    typedef uint32_t UIntBase;
	    typedef int32_t IntBase;
    #endif

    #ifdef HOST_X64
            const int32_t c_i4ChunkShift = 6;
    #else
            const int32_t c_i4ChunkShift = 5;
    #endif
    const UIntBase c_iOne = 1;
    const uint32_t c_i4ChunkSize = (1 << c_i4ChunkShift);
    const uint32_t c_o4ChunkMask = (1 << c_i4ChunkShift) - 1;
    const uint32_t c_o4AlignedPtrMask = c_o4ChunkMask >> 3;

    // When _TESTING_BUILD_ is defined in the Release configuration extra checks are added
    // to some of the functions to make sure that we don't access past the bounds of the BitData.
    // #define _TESTING_BUILD_
    #if defined(_TESTING_BUILD_) || defined(_DEBUG)
        #define BD_ASSERTFAIL(text, ...) \
            std::cout << "BitData Assertion failed: " << text << ", file " << __FILE__ << ", function " << __FUNCTION__ << ", line " << __LINE__ << std::endl; \
            assert(text && 0);\
            return __VA_ARGS__;
    #else
        #define BD_ASSERTFAIL(text, ...) \
            return __VA_ARGS__;
    #endif
    #define BD_ASSERT(condition, text, ...) \
        if( !(condition) ) { \
            BD_ASSERTFAIL( text, __VA_ARGS__); \
        }

    const double LOG_2_10 = 3.3219280948873623478703194294894; // used for figuring out how many bits are required for value represented in decimal
    const int32_t sc_o4FindFirstLookup[] = {0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; // used for faster FindFirst/Last lookup
    const char* cstrHexDigits = "0123456789abcdef";
    const unsigned char sc_o4MaskedAsciiToHexVal[] = {0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
    const uint32_t bitdigits[] = {0x30303030, 0x31303030, 0x30313030, 0x31313030, 0x30303130, 0x31303130, 0x30313130, 0x31313130, 0x30303031, 0x31303031, 0x30313031, 0x31313031, 0x30303131, 0x31303131, 0x30313131, 0x31313131};
    const uint32_t sc_ao4Multiplier[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; // used for faster decimal string conversion

    /////////////////////////////////////////////////////////////////////////////////////////////////////////
    // Library initialization
    /////////////////////////////////////////////////////////////////////////////////////////////////////////

    bool bPopCntSupported = false;
    class InitBitData
    {
    public:
        InitBitData()
        {
	        int CPUInfo[4] = {-1};
            _cpuid(CPUInfo, 0x0); // with 0x0, EAX returns the maximum supported function number for CPUID
            unsigned nIds = CPUInfo[0];
	        if (nIds >= 1)
	        {
                _cpuid(CPUInfo, 0x1); // with 0x1, ECX bit 23 gives POPCNT support
		        bPopCntSupported = ((CPUInfo[2] & 0x800000) != 0);
	        }
        }
    };
    // Perform one-time initialization on module startup.
    InitBitData _initBitData;


    /////////////////////////////////////////////////////////////////////////////////////////////////////////
    //	private methods
    /////////////////////////////////////////////////////////////////////////////////////////////////////////

    void SetPartialChunk(BitData* const	dst, const uint64_t	dstStart, const uint32_t size, const UIntBase value);

    void _BDV_Create(
	    BitData* const	bd,
	    uint64_t size,
	    uint64_t capacity
    )
    {
	    bd->bitsize = size;
	    bd->bitcapacity = capacity;
	    std::vector<UIntBase>* p_vec = new std::vector<UIntBase>((uint32_t)((capacity + c_i4ChunkSize - 1)/c_i4ChunkSize));
        bd->buffer = p_vec->data();
	    bd->obj = p_vec;
    }

    UIntBase GetFullChunk(			// RET: chunk from bitfield
	    const BitData* const	src,		// IN:	source
	    const uint64_t			srcStart	// IN:	offset of first source bit
    )
    {
	    // this gets called a lot within this module -- for performance reasons do not assert on assumptions
	    // (have verified all clients within this module cannot pass in a NULL pointer)

	    const int bitshift = srcStart&7;
	    const uint8_t* srcBuffer = (const uint8_t*)src->buffer + (srcStart >> 3);
	    return bitshift ? ((*(const UIntBase*)srcBuffer) >> bitshift) | ((UIntBase)srcBuffer[sizeof(UIntBase)] << (c_i4ChunkSize - bitshift))
		    : (*(const UIntBase*)srcBuffer);
    }

    UIntBase GetPartialChunk(				// RET: chunk from bitfield
	    const BitData* const	src,		// IN:	source
	    const uint64_t			start,		// IN:	offset of first source bit
	    const uint64_t			size		// IN:	size of bitfield (must be < chunk size)
    )
    {
	    // this gets called a lot within this module -- for performance reasons do not assert on assumptions
	    // (have verified all clients within this module cannot pass in a NULL pointer)

	    UIntBase ulRetVal;

	    // pointer to first byte containing src bits
	    const unsigned char* c_po1Src = (const unsigned char*)src->buffer + (start >> 3);

        const uint32_t c_nBitOffset = start & 7; //The offset of the first bit that we care about within the first byte of the buffer
        const uint32_t c_nNumLowerBits = 8 - c_nBitOffset; //Number of bits that we care about in the first byte of the buffer
        if (size > c_nNumLowerBits)
        {
            CommonUtils::MemoryCopy(reinterpret_cast<uint8_t*>(&ulRetVal), sizeof(ulRetVal), &c_po1Src[1], BitsToUint8s(size - c_nNumLowerBits));
            ulRetVal = (ulRetVal << c_nNumLowerBits) | (c_po1Src[0] >> c_nBitOffset);
        }
        else
        {
            ulRetVal = c_po1Src[0] >> c_nBitOffset;
        }

	    return ulRetVal & ((c_iOne << size) - 1);
    }

    inline void SetFullChunk(
        BitData* const	dst,		// OUT:	destination
        const uint64_t	dstStart,	// IN:	offset of first source bit
        const UIntBase	value		// IN:  dword value to write
    )
    {
        // this gets called a lot within this module -- for performance reasons do not assert on assumptions
        // (have verified all clients within this module cannot pass in a NULL pointer)

        const int c_nShift = (int)(dstStart & c_o4ChunkMask);
        UIntBase* pDest = (UIntBase*)dst->buffer + (dstStart >> c_i4ChunkShift);

        if (c_nShift)
        {
            if (dst->bitcapacity - dstStart >= c_i4ChunkSize)
            {
                UIntBase ulMask = (c_iOne << c_nShift) - 1;
                *pDest = (value << c_nShift) | (*pDest&ulMask);
                *(pDest + 1) = (value >> (c_i4ChunkSize - c_nShift)) | (*(pDest + 1)&~ulMask);
            }
            else
            {
                SetPartialChunk(dst, dstStart, c_i4ChunkSize, value);
            }
        }
        else
        {
            *pDest = value;
        }
    }

    void SetPartialChunk(
	    BitData* const	dst,		// OUT:	destination
	    const uint64_t	dstStart,	// IN:	offset of first source bit
	    const uint32_t	size,		// IN:	number of bits in chunk
	    const UIntBase	value		// IN:  partial chunk value to write
    )
    {
	    // this gets called a lot within this module -- for performance reasons do not assert on assumptions
	    // (have verified all clients within this module cannot pass in a NULL pointer)

        uint8_t* ptrByte = (dstStart>>3) + reinterpret_cast<uint8_t*>(dst->buffer); // start byte is the start byte offset plus the buffer start
	    uint32_t count = size;
	    UIntBase val = value;

	    // copy any partial start byte
	    if (dstStart & 0x7)
	    {
		    uint8_t byteBitOffset = (dstStart & 0x7);
            uint8_t byteBits = 8 - byteBitOffset;
            if (byteBits > size)
            {
			    uint8_t o1Mask = ((1 << size) - 1) << byteBitOffset;
			    *ptrByte =  (*ptrByte & ~o1Mask) | ((val << byteBitOffset) & o1Mask);
                count = 0;
            }
            else
            {
        	    *ptrByte = ((uint8_t)(*ptrByte << byteBits) >> byteBits) | (static_cast<uint8_t>(val) << byteBitOffset);
                ptrByte++;
			    val >>= byteBits;
                count -= byteBits;
            }
        }

	    // copy any full bytes
	    while (count >= 8)
	    {
		    *ptrByte = static_cast<uint8_t>(val);
		    ptrByte++;
		    val >>= 8;
		    count -= 8;
	    }

	    // copy any partial end byte
	    if (count)
	    {
		    *ptrByte = (*ptrByte&(0xff<<count)) | (val&((1<<count) - 1));
	    }
    }

    uint64_t _StringValueSize(const char* strValue)
    {
        uint64_t i8MinBits = 0;
        size_t i4Chars = strlen(strValue);

        if ((i4Chars > 2) && ('0' == strValue[0]) && ('x' == strValue[1]))
        {
            uint64_t i4Current = 2;
            while ((i4Current < i4Chars) &&
                    (((strValue[i4Current] >= '0') && (strValue[i4Current] <= '9'))
                    || ((strValue[i4Current] >= 'a') && (strValue[i4Current] <= 'f'))
                    || ((strValue[i4Current] >= 'A') && (strValue[i4Current] <= 'F')))
            )
            {
                i4Current++;
            }

            i8MinBits = ((uint64_t)(i4Current - 2)) << 2;
        }
        else if ((i4Chars > 2) && ('0' == strValue[0]) && ('b' == strValue[1]))
        {
            uint64_t i4Current = 2;
            while ((i4Current < i4Chars) && ((strValue[i4Current] == '0') || (strValue[i4Current] == '1')))
            {
                i4Current++;
            }

            i8MinBits = (i4Current - 2);
        }
        else if ((i4Chars > 0) && (strValue[0] >= '0') && (strValue[0] <= '9'))
        {
            uint64_t i4Current = 1;
            while ((i4Current < i4Chars) && ((strValue[i4Current] >= '0') && (strValue[i4Current] <= '9')))
            {
                i4Current++;
            }

            i8MinBits = (uint64_t)ceil(LOG_2_10 * i4Current);
        }
        else
        {
		    BD_ASSERTFAIL("Bad param values", 0);
        }

        return i8MinBits;
    }

    void _AddUInt32(
	    BitData* bd,
	    uint32_t o4Value
	    )
    {
        uint64_t o8PartialSum = o4Value;
        uint32_t o4Chunks = BitsToUint32s(bd->bitsize);
        for (uint32_t i = 0; (i < o4Chunks) && ((uint32_t)o8PartialSum != 0); i++)
        {
            o8PartialSum += ((uint32_t*)bd->buffer)[i];
            ((uint32_t*)bd->buffer)[i] = (uint32_t)o8PartialSum;
		    o8PartialSum = o8PartialSum >> 32;
        }
    }

    void _MultiplyByUInt32(
	    BitData* bd,
	    uint32_t o4Value
	    )
    {
        uint64_t o8PartialSum = 0;
        uint32_t o4Chunks = BitsToUint32s(bd->bitsize);
        for (uint32_t i = 0; i < o4Chunks; i++)
        {
            o8PartialSum += (((uint32_t*)bd->buffer)[i] * (uint64_t)o4Value);
            ((uint32_t*)bd->buffer)[i] = (uint32_t)o8PartialSum;
		    o8PartialSum = ((uint32_t*)&o8PartialSum)[1]; // shift the 64-bit to the right be 32-bits, but avoid using rshift on a 32-bit machine
        }
    }

    uint32_t _DivideByUInt32(
	    BitData* bd,
	    uint32_t o4Value
	    )
    {
        split_uint64 o8Dividend = {0};
        int32_t o4Chunks = (int32_t)BitsToUint32s(bd->bitsize);
	    uint32_t unalignedBitsInChunk = bd->bitsize & 0x1f;
        for (int i = o4Chunks - 1; i >= 0; i--)
        {
		    if (unalignedBitsInChunk != 0)
		    {
			    o8Dividend.part.low = (uint32_t)GetPartialChunk(bd, i << 5, unalignedBitsInChunk);
			    SetPartialChunk(bd, i << 5, unalignedBitsInChunk, (uint32_t)(o8Dividend.value / o4Value));
			    unalignedBitsInChunk = 0;
		    }
		    else
		    {
			    o8Dividend.part.low = ((uint32_t*)bd->buffer)[i];
			    ((uint32_t*)bd->buffer)[i] = (uint32_t)(o8Dividend.value / o4Value);
		    }
		    o8Dividend.part.high = (uint32_t)(o8Dividend.value % o4Value);
		    o8Dividend.part.low = 0;
        }

        return o8Dividend.part.high;
    }

    inline bool _BitDataFieldOk(
	    const BitData* const	src,		// IN:	source
	    const uint64_t			srcStart,	// IN:  location of first source bit
	    const uint64_t			count		// IN:	number of bits to copy
    )
    {
	    return (src != 0) && (src->buffer != 0) && (src->bitsize >= srcStart + count);
    }
} // end namespace Anonymous

/////////////////////////////////////////////////////////////////////////////////////////////////////////
//	public access capability
/////////////////////////////////////////////////////////////////////////////////////////////////////////

bool BitData_GetBit(
    const BitData* bd,
	uint64_t       offset
	)
{
	BD_ASSERT(bd != nullptr, "The BitData pointer passed in is null.", false);
	BD_ASSERT(bd->buffer != nullptr, "The BitData buffer is null.", false);
	BD_ASSERT(bd->bitsize >= offset, "The offset specified is pass the bounds of the BitData.", false);

	return (((uint8_t*)(bd->buffer))[offset >> 3] >> (offset & 0x7)) & 1;
}

void BitData_SetBit(
    const BitData* bd,
	uint64_t       offset,
	bool           bit
	)
{
	BD_ASSERT(bd != nullptr, "The BitData pointer passed in is null.");
	BD_ASSERT(bd->buffer != nullptr, "The BitData buffer is null.");
	BD_ASSERT(bd->bitsize >= offset, "The offset specified is pass the bounds of the BitData.");

	uint8_t* byteptr = (uint8_t*)(bd->buffer) + (offset >> 3);
	if (bit)
	{
		*byteptr |= (1<<(int)(offset & 0x7));
	}
	else
	{
		*byteptr &= ~(1<<(int)(offset & 0x7));
	}
}

uint32_t BitData_GetUInt32(
    const BitData*	bd,
	uint64_t		offset
	)
{
	uint32_t value = 0;
	if (offset + c_i4ChunkSize <= bd->bitsize)
	{
		value = (uint32_t)GetFullChunk(bd, offset);
	}
	else if (offset < bd->bitsize)
	{
		value = (uint32_t)GetPartialChunk(bd, offset, bd->bitsize - offset);
	}
	else
	{
		BD_ASSERTFAIL("The offset specified is too large to retrieve 32 bits from the BitData.", 0);
	}

	return value;
}

uint64_t BitData_GetUpToUInt64(
	const BitData* const	src,		// IN:	source
	const uint64_t			start,		// IN:	offset of first source bit
	const uint32_t			size		// IN:	the amount of bits to retrieve (must be < 64 bits)
	)
{
	BD_ASSERT(src != nullptr, "The BitData pointer passed in is null.", 0);
	BD_ASSERT(src->bitsize >= start, "The start specified is pass the bounds of the BitData.", 0);
	BD_ASSERT(src->bitsize >= (start + size), "Trying to access more bits than are available in the BitData.", 0);

	uint64_t value = 0;
	if (size == c_i4ChunkSize)
	{
		value = GetFullChunk(src, start);
	}
	else if (size < c_i4ChunkSize)
	{
		value = GetPartialChunk(src, start, size);
	}
#ifndef HOST_X64
	else if (size == 64)
	{
		value = GetFullChunk(src, start) | (((uint64_t)GetFullChunk(src, start + 32)) << 32);
	}
	else if (size < 64)
	{
		value = GetFullChunk(src, start) | (((uint64_t)GetPartialChunk(src, start + 32, size - 32)) << 32);
	}
#endif
	else
	{
		BD_ASSERTFAIL("The size specified is greater than 64 bits.", 0);
	}

	return value;
}

void BitData_SetUpToUInt64(
	BitData* const	bd,		// OUT:	destination
	const uint64_t	start,	// IN:	offset of first source bit
	const uint32_t	size,	// IN:	number of bits in chunk
	const uint64_t	value	// IN:  partial chunk value to write
	)
{
	BD_ASSERT(bd != nullptr, "The BitData pointer passed in is null.");
	BD_ASSERT(bd->bitsize >= start, "The start specified is pass the bounds of the BitData.");
	BD_ASSERT(bd->bitsize >= (start + size), "Trying to access more bits than are available in the BitData.");

	if (size == c_i4ChunkSize)
	{
		SetFullChunk(bd, start, (UIntBase)value);
	}
	else if (size < c_i4ChunkSize)
	{
		SetPartialChunk(bd, start, size, (UIntBase)value);
	}
#ifndef HOST_X64
	else if (size == 64)
	{
		SetFullChunk(bd, start, (UIntBase)value);
		SetFullChunk(bd, start + 32, (UIntBase)(value >> 32));
	}
	else if (size < 64)
	{
		SetFullChunk(bd, start, (UIntBase)value);
		SetPartialChunk(bd, start + 32, size - 32, (UIntBase)(value >> 32));
	}
#endif
	else
	{
		BD_ASSERTFAIL("The size specified is greater than 64 bits.");
	}
}

uint64_t BitData_GetUInt64(
    const BitData*	bd,
	uint64_t			offset
	)
{
	uint64_t value = 0;
	if (offset + 64 <= bd->bitsize)
	{
#ifdef HOST_X64
		value = GetFullChunk(bd, offset);
#else
		value = GetFullChunk(bd, offset) | (((uint64_t)GetFullChunk(bd, offset + 32)) << 32);
	}
	else if (offset + 32 < bd->bitsize)
	{
		value = GetFullChunk(bd, offset) | (((uint64_t)GetPartialChunk(bd, offset + 32, bd->bitsize - (offset + 32))) << 32);
	}
	else if (offset + 32 == bd->bitsize)
	{
		value = GetFullChunk(bd, offset);
#endif
	}
	else if (offset < bd->bitsize)
	{
		value = GetPartialChunk(bd, offset, bd->bitsize - offset);
	}
	else
	{
		BD_ASSERTFAIL("The offset specified is too large to retrieve 64 bits from the BitData.", 0);
	}

	return value;
}

void BitData_GetBuffer(
    const BitData*	bd,
	uint64_t		start,
	uint8_t*		bytearray,
	uint32_t		bytearraysize
	)
{
	if (start < bd->bitsize)
	{
		UIntBase* copyToPtr = (UIntBase*)bytearray;
		uint64_t chunkstart = start;
		// The number of bits to copy is either the number of bits in the BitData to be copied or
		// the number of bits available in the array. It is the smaller of the two.
		uint64_t numBitsToCopy = std::min((bd->bitsize - start), static_cast<uint64_t>(bytearraysize << 3));
		uint32_t numBytesToCopy = BitsToUint8s(numBitsToCopy);
		uint32_t numBytesToZero = (bytearraysize > numBytesToCopy) ? bytearraysize - numBytesToCopy : 0;
        uint32_t bytearrayRemainingSize = bytearraysize;

		// If we are on a byte boundary, copy all the full bytes possible.
		if (((start & 7) == 0) && (numBitsToCopy > 7))
		{
			uint32_t numMemCopyBytes = static_cast<uint32_t>(numBitsToCopy >> 3);
			uint64_t numMemCopyBits = static_cast<uint64_t>(numMemCopyBytes) << 3;
			uint64_t startByte = start >> 3;
            CommonUtils::MemoryCopy(reinterpret_cast<uint8_t*>(copyToPtr), bytearrayRemainingSize, reinterpret_cast<uint8_t*>(bd->buffer) + startByte, numMemCopyBytes);

			// Update the byte pointer to the address of the last byte copied.
			copyToPtr = (UIntBase*)(bytearray + numMemCopyBytes);
            bytearrayRemainingSize -= numMemCopyBytes;
			chunkstart += numMemCopyBits;    // Increase the start index by the number of bits copied.
			numBitsToCopy -= numMemCopyBits; // Decrease the number of bits to copy by the number of bits copied.
		}

		while (numBitsToCopy != 0)
		{
			if (numBitsToCopy >= c_i4ChunkSize)
			{
				*copyToPtr++ = GetFullChunk(bd, chunkstart);
                bytearrayRemainingSize -= BitsToUint8s(c_i4ChunkSize);
				numBitsToCopy -= c_i4ChunkSize;
				chunkstart += c_i4ChunkSize;
			}
			else
			{
				UIntBase chunk = GetPartialChunk(bd, chunkstart, numBitsToCopy);
                CommonUtils::MemoryCopy(copyToPtr, bytearrayRemainingSize, &chunk, BitsToUint8s(numBitsToCopy));
                bytearrayRemainingSize -= BitsToUint8s(numBitsToCopy);
				numBitsToCopy = 0;
			}
		}

		// If the size of the BitData is not large enough to fill all the
		// bytes in the array then zero out all the upper bytes remaining.
		if (numBytesToZero != 0)
		{
			uint32_t byteIndex = bytearraysize - numBytesToZero;
			memset((void*) (bytearray + byteIndex), 0, numBytesToZero);
		}
	}
	else
	{
		BD_ASSERTFAIL("The start specified is pass the bounds of the BitData.");
	}
}

void BitData_ReadIndices(
	const BitData*	    bd,
    const IndexList*    indices,
	BitData*            value
)
{
	BD_ASSERT(bd != nullptr && value != nullptr, "The BitData pointer passed in is null.");
	BD_ASSERT(indices != nullptr, "The IndexList pointer passed in is null.");

    uint64_t offset = 0;
    size_t length = 0;
    if (IndexList_ExtractOffsetAndLength(indices, &offset, &length))
    {
        BitData_Copy(bd, offset, value, 0, length);
    }
    else
    {
        size_t indexCount = IndexList_GetLength(indices);
        if (indexCount <= bd->bitsize && indexCount <= value->bitsize)
        {
            IndexListIterator iterator = {};
            while (IndexList_MoveNext(indices, &iterator))
            {
                bool bit = BitData_GetBit(bd, iterator.index);
                BitData_SetBit(value, iterator.position, bit);
            }
        }
        else
        {
            BD_ASSERTFAIL("The IndexList and offset specified is passed the bounds of the BitData.");
        }
    }
}

void BitData_WriteIndices(
	BitData*            bd,
    const IndexList*    indices,
	const BitData*      value
)
{
	BD_ASSERT(bd != nullptr && value != nullptr, "The BitData pointer passed in is null.");
	BD_ASSERT(indices != nullptr, "The IndexList pointer passed in is null.");

    uint64_t offset = 0;
    size_t length = 0;
    if (IndexList_ExtractOffsetAndLength(indices, &offset, &length))
    {
        BitData_Copy(value, 0, bd, offset, length);
    }
    else
    {
        size_t indexCount = IndexList_GetLength(indices);
        if (indexCount <= bd->bitsize && indexCount <= value->bitsize)
        {
            IndexListIterator iterator = {};
            while (IndexList_MoveNext(indices, &iterator))
            {
                bool bit = BitData_GetBit(value, iterator.position);
                BitData_SetBit(bd, iterator.index, bit);
            }
        }
        else
        {
            BD_ASSERTFAIL("The IndexList and offset specified is passed the bounds of the BitData.");
        }
    }
}

void BitData_SetUInt32(
    BitData*		bd,
	uint64_t			offset,
	uint32_t			value
	)
{
	if (offset < bd->bitsize)
	{
		uint32_t o4NumBits = (uint32_t)(((bd->bitsize - offset) < 32) ? (bd->bitsize - offset) : 32);
		if (o4NumBits == c_i4ChunkSize)
		{
			SetFullChunk(bd, offset, value);
		}
		else
		{
			SetPartialChunk(bd, offset, o4NumBits, value);
		}
	}
	else
	{
		BD_ASSERTFAIL("The offset specified is pass the bounds of the BitData.");
	}
}

void BitData_SetUInt64(
    BitData*		bd,
	uint64_t			offset,
	uint64_t			value
	)
{
	if (offset < bd->bitsize)
	{
		uint32_t o4NumBits = (uint32_t)(((bd->bitsize - offset) < 64) ? (bd->bitsize - offset) : 64);

		// If we are setting at least one 'chunks' worth of data (32 or 64 bits.)
		if(o4NumBits >= c_i4ChunkSize)
		{
			// Set the first full chunk bits.
			SetFullChunk(bd, offset, (UIntBase)value);

#ifndef HOST_X64
			// On 32 bit builds we need to check if the upper 32 bits needs to be set.
			if (o4NumBits == 64)
			{
				// If we are setting the full 64 bits then set the upper 32 bits.
				SetFullChunk(bd, offset + c_i4ChunkSize, (UIntBase)(value >> c_i4ChunkSize));
			}
			else if(o4NumBits > c_i4ChunkSize)
			{
				// If we are setting more than 32 bits but less than 64 bits.
				SetPartialChunk(bd, offset + c_i4ChunkSize, o4NumBits - c_i4ChunkSize, (UIntBase)(value >> c_i4ChunkSize));
			}
#endif
		}
		else
		{
			// If we are only setting a partial chunk of of data.
			SetPartialChunk(bd, offset, o4NumBits, (UIntBase)value);
		}
	}
	else
	{
		BD_ASSERTFAIL("The offset specified is pass the bounds of the BitData.");
	}
}

void BitData_SetBuffer(
    BitData*		bd,
	uint64_t		start,
	const uint8_t*	bytearray,
	uint32_t		bytearraysize
	)
{
	if (start + (bytearraysize << 3) <= bd->bitsize)
	{
		if ((start & 3) == 0)
		{
            CommonUtils::MemoryCopy(reinterpret_cast<uint8_t*>(bd->buffer) + (start >> 3), BitsToUint8s(bd->bitsize - start), bytearray, bytearraysize);
		}
		else
		{
			uint64_t count = (uint64_t)bytearraysize << 3;
			uint64_t chunkstart = start;
			const UIntBase* arrayptr = (const UIntBase*)bytearray;

			while (count != 0)
			{
				if (count >= c_i4ChunkSize)
				{
					SetFullChunk(bd, chunkstart, *arrayptr++);
					count -= c_i4ChunkSize;
					chunkstart += c_i4ChunkSize;
				}
				else
				{
					SetPartialChunk(bd, chunkstart, (uint32_t)count, *arrayptr);
					count = 0;
				}
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("There is not enough room in the BitData to copy the contents of the buffer into it.");
	}
}

void BitData_FindFirst(
	const BitData*	bd,
	uint64_t		start,
	uint64_t		size,
	uint64_t*		poffset
)
{
    *poffset = c_o4NoBitSet;
	if (_BitDataFieldOk(bd, start, size))
	{
		uint64_t count = size;
		UIntBase chunk = 0;
		uint64_t chunkstart = start;

		while ((count != 0) && (c_o4NoBitSet == *poffset))
		{
			if (count >= c_i4ChunkSize)
			{
				chunk = GetFullChunk(bd, chunkstart);
				count -= c_i4ChunkSize;
			}
			else
			{
				chunk = GetPartialChunk(bd, chunkstart, count);
				count = 0;
			}

			if (chunk != 0)
			{
#ifdef _MSC_VER
#pragma warning( disable : 4146 )
#endif
	            chunk &= (UIntBase)(-chunk);
#ifdef _MSC_VER
#pragma warning( default : 4146 )
#endif
#ifdef HOST_X64
                if ((chunk & 0xffffffff) != 0)
                {
                    *poffset = (chunkstart - start) + sc_o4FindFirstLookup[(int)((((uint32_t)chunk) * 0x077cb531) >> 27)];
                }
                else
                {
                    *poffset = (chunkstart - start) + sc_o4FindFirstLookup[(int)((((uint32_t)(chunk >> 32)) * 0x077cb531) >> 27)] + 32;
                }
#else
                *poffset = (chunkstart - start) + sc_o4FindFirstLookup[(int)((((uint32_t)chunk) * 0x077cb531) >> 27)];
#endif
			}
			else
			{
				chunkstart += c_i4ChunkSize;
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_FindLast(
	const BitData*	bd,
	uint64_t			start,
	uint64_t			size,
	uint64_t*			poffset
)
{
    *poffset = c_o4NoBitSet;
	if (_BitDataFieldOk(bd, start, size))
	{
		uint64_t count = size;
		UIntBase chunk = 0;

		while ((count != 0) && (c_o4NoBitSet == *poffset))
		{
			if (count >= c_i4ChunkSize)
			{
				count -= c_i4ChunkSize;
				chunk = GetFullChunk(bd, start + count);
			}
			else
			{
				chunk = GetPartialChunk(bd, start, count);
				count = 0;
			}

			if (chunk != 0)
			{
				unsigned long offset;
#ifdef HOST_X64
    #if defined(HOST_WINDOWS)
				_BitScanReverse64(&offset, chunk);
    #elif defined(HOST_LINUX) || defined(HOST_DARWIN)
				offset = 63 - __builtin_clzll(chunk);
    #endif

#else
    #if defined(HOST_WINDOWS)
				_BitScanReverse(&offset, chunk);
    #elif defined(HOST_LINUX) || defined(HOST_DARWIN)
				offset = 31 - __builtin_clz(chunk);
    #endif

#endif
				*poffset = count + offset;
//				chunk |= chunk >> 1;
//				chunk |= chunk >> 2;
//				chunk |= chunk >> 4;
//				chunk |= chunk >> 8;
//				chunk |= chunk >> 16;
//#ifdef HOST_X64
//				chunk |= chunk >> 32;
//#endif
//				chunk = (chunk >> 1) + 1;
//
//#ifdef HOST_X64
//                if ((chunk & 0xffffffff) != 0)
//                {
//                    *poffset = count + sc_o4FindFirstLookup[(int)((((uint32_t)chunk) * 0x077cb531) >> 27)];
//                }
//                else
//                {
//                    *poffset = count + sc_o4FindFirstLookup[(int)((((uint32_t)(chunk >> 32)) * 0x077cb531) >> 27)] + 32;
//                }
//#else
//                *poffset = count + sc_o4FindFirstLookup[(int)((((uint32_t)chunk) * 0x077cb531) >> 27)];
//#endif
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_PopCount(
	const BitData*	bd,
	uint64_t			start,
	uint64_t			size,
	uint64_t*			pbits
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
		uint64_t bits = 0;
		uint64_t count = size;
		UIntBase chunk = 0;

		while (count != 0)
		{
			if (count >= c_i4ChunkSize)
			{
				chunk = GetFullChunk(bd, start);
				start += c_i4ChunkSize;
				count -= c_i4ChunkSize;
			}
			else
			{
				chunk = GetPartialChunk(bd, start, count);
				count = 0;
			}

			if (chunk != 0)
			{
#ifdef _MSC_VER
				if (bPopCntSupported)
				{
#ifdef HOST_X64
					bits += __popcnt64(chunk);
#else
					bits += __popcnt(chunk);
#endif
                }
				else
#endif
				{
#ifdef HOST_X64
					chunk = chunk - ((chunk >> 1) & 0x5555555555555555);
					chunk = (chunk & 0x3333333333333333) + ((chunk >> 2) & 0x3333333333333333);
					bits += ((((chunk + (chunk >> 4)) & 0x0F0F0F0F0F0F0F0F) * 0x0101010101010101) >> 56);
#else
					chunk = chunk - ((chunk >> 1) & 0x55555555);
					chunk = (chunk & 0x33333333) + ((chunk >> 2) & 0x33333333);
					bits += ((((chunk + (chunk >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24);
#endif
				}
			}
		}

		*pbits = bits;
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_Parity(
	const BitData*	bd,
	uint64_t			start,
	uint64_t			size,
	bool*			ppar
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
		uint64_t count = size;
		UIntBase chunk = 0;

		while (count != 0)
		{
			if (count >= c_i4ChunkSize)
			{
				chunk ^= GetFullChunk(bd, start);
				start += c_i4ChunkSize;
				count -= c_i4ChunkSize;
			}
			else
			{
				chunk ^= GetPartialChunk(bd, start, count);
				count = 0;
			}
		}

#ifdef HOST_X64
		chunk ^= (chunk >> 32);
#endif
		chunk ^= (chunk >> 16);
		chunk ^= (chunk >> 8);

		*ppar = (bool)((0x6996 >> (int)((chunk ^ (chunk >> 4)) & 0xf)) & 1);
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_IsValue(
	const BitData*	bd,
	uint64_t			start,
	uint64_t			size,
	uint64_t			value,
	bool*			psame
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
		*psame = true;
		uint64_t count = size;
		UIntBase chunk = 0;
#ifndef HOST_X64
		bool bUpper = false;
		uint32_t upperval = (uint32_t)(value >> 32);
#endif

		while ((count != 0) && *psame)
		{
			if (count >= c_i4ChunkSize)
			{
				chunk = GetFullChunk(bd, start);
#ifdef HOST_X64
				*psame = (chunk == value);
#else
				*psame = (chunk == (bUpper ? upperval : (uint32_t)value));
#endif
				start += c_i4ChunkSize;
				count -= c_i4ChunkSize;
			}
			else
			{
				chunk = GetPartialChunk(bd, start, count);
#ifdef HOST_X64
				*psame = ((chunk ^ value) & ((c_iOne<<count) - 1)) == 0;
#else
				*psame = ((chunk ^ (bUpper ? upperval : (uint32_t)value)) & ((c_iOne<<count) - 1)) == 0;
#endif
				count = 0;
			}
#ifndef HOST_X64
			bUpper = !bUpper;
#endif
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_XorValue(
	BitData*	bd,
	uint64_t		start,
	uint64_t		size,
	uint64_t		value
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
		uint64_t count = size;
#ifndef HOST_X64
		bool bUpper = false;
		uint32_t upperval = (uint32_t)(value >> 32);
#endif
		while (count != 0)
		{
			if (count >= c_i4ChunkSize)
			{
#ifdef HOST_X64
				SetFullChunk(bd, start, GetFullChunk(bd, start) ^ value);
#else
				SetFullChunk(bd, start, GetFullChunk(bd, start) ^ (bUpper ? upperval : (uint32_t)value));
#endif
				start += c_i4ChunkSize;
				count -= c_i4ChunkSize;
			}
			else
			{
#ifdef HOST_X64
				SetPartialChunk(bd, start, (uint32_t)count, GetPartialChunk(bd, start, count) ^ value);
#else
				SetPartialChunk(bd, start, (uint32_t)count, GetPartialChunk(bd, start, count) ^ (bUpper ? upperval : (uint32_t)value));
#endif
				count = 0;
			}

#ifndef HOST_X64
			bUpper = !bUpper;
#endif
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_InitValue(
	BitData*	bd,
	uint64_t		start,
	uint64_t		size,
	uint64_t		value
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
		uint64_t count = size;
#ifndef HOST_X64
		bool bUpper = false;
		uint32_t upperval = (uint32_t)(value >> 32);
#endif

		// removable optimization
		// if this is byte aligned, and value is 0 or -1 (which is common), and there are a sufficient count of bytes to justify a call, use memset to flash init full bytes
		if ( ((start & 0x7) == 0) && ((value == 0) || (value == 0xFFFFFFFFFFFFFFFF)) )
		{
			size_t numbytes = (size_t)(size >> 3);
			size_t numbits = numbytes << 3;
			memset(((uint8_t*)bd->buffer) + (start >> 3), (int)value, numbytes);
			start += numbits;
			count -= numbits;
		}

		while (count != 0)
		{
			if (count >= c_i4ChunkSize)
			{
#ifdef HOST_X64
				SetFullChunk(bd, start, value);
#else
				SetFullChunk(bd, start, bUpper ? upperval : (uint32_t)value);
#endif
				start += c_i4ChunkSize;
				count -= c_i4ChunkSize;
			}
			else
			{
#ifdef HOST_X64
				SetPartialChunk(bd, start, (uint32_t)count, value);
#else
				SetPartialChunk(bd, start, (uint32_t)count, bUpper ? upperval : (uint32_t)value);
#endif
				count = 0;
			}
#ifndef HOST_X64
			bUpper = !bUpper;
#endif
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_Randomize(
	BitData*	bd,
	uint64_t		start,
	uint64_t		size
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
		uint64_t count = size;
		UIntBase value;
		while (count != 0)
		{
#ifdef HOST_X64
			value = (((((((uint64_t)(uint16_t)rand()) << 16) | (uint16_t)rand()) << 16) | (uint16_t)rand()) << 16) | (uint16_t)rand();
#else
			value = (((uint32_t)rand()) << 16) | (uint16_t)rand();
#endif
			if (count >= c_i4ChunkSize)
			{
				SetFullChunk(bd, start, value);
				start += c_i4ChunkSize;
				count -= c_i4ChunkSize;
			}
			else
			{
				SetPartialChunk(bd, start, (uint32_t)count, value);
				count = 0;
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

uint32_t BitData_GenerateECC(
	const BitData*	bd,
	uint64_t			start,
	uint64_t			size,
    uint32_t*			ao4EccMasks
)
{
    uint32_t o4ECC = 0;

	if (_BitDataFieldOk(bd, start, size))
	{
		// for each data bit in the bitfield...
		for (uint32_t o4DataBit = (uint32_t)start; o4DataBit < (start + size); o4DataBit++)
		{
			// if the bit is on, xor-into the cumulative the ECC mask for that bit
			if ((((uint8_t*)(bd->buffer))[o4DataBit >> 3] >> (o4DataBit & 0x7)) & 1)
			{
				o4ECC ^= ao4EccMasks[o4DataBit - start];
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values", 0);
	}

    return o4ECC;
}

void BitData_InitCrcDef(
	OpenIPC_CrcDef* crcDef,
	uint32_t o4Width,
	uint32_t o4Polynomial,
	uint32_t o4InitialValue,
	bool bReflectIn,
	bool bReflectOut,
	uint32_t o4XorOut
)
{
	if (crcDef != 0)
	{
		crcDef->m_o4Width = o4Width;
		crcDef->m_o4InitialValue = o4InitialValue;
		if (!bReflectIn)
		{
			BitData temp = BitData_CreateLocalFromBuffer(o4Width, o4Width, &(crcDef->m_o4InitialValue));
			BitData_Reverse(&temp, 0, o4Width);
		}

		crcDef->m_bReflectIn = bReflectIn;
		crcDef->m_bReflectOut = bReflectOut;
		crcDef->m_o4XorOut = o4XorOut;

		uint32_t o4Crc;
		uint32_t o4WidthMask = 0xffffffffu >> (int)(32 - o4Width);

		// reverse all the bits in the polynomial
		if (bReflectIn)
		{
			BitData temp = BitData_CreateLocalFromBuffer(32, 32, &o4Polynomial);
			BitData_Reverse(&temp, 0, 32);
			o4Polynomial >>= (32 - (int)o4Width);

			for (int i = 0; i < 256; i++)
			{
				o4Crc = (uint32_t)i;

				for (int iPos = 0; iPos < 8; iPos++)
				{
					o4Crc = (o4Crc >> 1) ^ (((o4Crc & 0x1) != 0) ? o4Polynomial : 0);
				}

				crcDef->m_ao4CrcTable[i] = o4Crc & o4WidthMask;
			}
		}
		else
		{
			uint32_t o4TopBit = 0x80000000u;
			o4Polynomial <<= (32 - (int)o4Width);

			for (int i = 0; i < 256; i++)
			{
				o4Crc = (uint32_t)i;
				o4Crc = ((o4Crc & 0xf0f0f0f0) >> 4) | ((o4Crc & 0x0f0f0f0f) << 4);
				o4Crc = ((o4Crc & 0xCCCCCCCC) >> 2) | ((o4Crc & 0x33333333) << 2);
				o4Crc = ((o4Crc & 0xAAAAAAAA) >> 1) | ((o4Crc & 0x55555555) << 1);
				o4Crc <<= 24;

				for (int iPos = 0; iPos < 8; iPos++)
				{
					o4Crc = (o4Crc << 1) ^ (((o4Crc & o4TopBit) != 0) ? o4Polynomial : 0);
				}

				BitData temp = BitData_CreateLocalFromBuffer(32, 32, &o4Crc);
				BitData_Reverse(&temp, 0, 32);
				crcDef->m_ao4CrcTable[i] = o4Crc & o4WidthMask;
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

uint32_t BitData_GenerateCRC(
	const BitData*	bd,
	uint64_t			start,
	uint64_t			size,
    const OpenIPC_CrcDef*	crcDef
)
{
	uint32_t o4CRC = crcDef->m_o4InitialValue;
	if (_BitDataFieldOk(bd, start, size))
	{
		uint64_t count = size;

		while(count > 0)
		{
			uint32_t o4Chunk;
			int32_t o4Bytes;
			if (count >= 32)
			{
	#ifdef HOST_X64
				o4Chunk = (uint32_t)GetPartialChunk(bd, start, 32);
	#else
				o4Chunk = GetFullChunk(bd, start);
	#endif
				start += 32;
				count -= 32;
				o4Bytes = 4;
			}
			else
			{
				o4Chunk = (uint32_t)GetPartialChunk(bd, start, count);
				o4Bytes = BitsToUint8s(count);
				count = 0;
			}

			if (!crcDef->m_bReflectIn)
			{
				// quickly reverse all bits in the bytes of the DWord
				o4Chunk = ((o4Chunk & 0xf0f0f0f0) >> 4) | ((o4Chunk & 0x0f0f0f0f) << 4);
				o4Chunk = ((o4Chunk & 0xCCCCCCCC) >> 2) | ((o4Chunk & 0x33333333) << 2);
				o4Chunk = ((o4Chunk & 0xAAAAAAAA) >> 1) | ((o4Chunk & 0x55555555) << 1);
			}

			while (o4Bytes-- != 0)
			{
				o4CRC = crcDef->m_ao4CrcTable[(o4CRC ^ o4Chunk) & 0xFF] ^ (o4CRC >> 8);
				o4Chunk >>= 8;
			}
		}

		if (!crcDef->m_bReflectOut)
		{
			BitData temp = BitData_CreateLocalFromBuffer(crcDef->m_o4Width, crcDef->m_o4Width, &o4CRC);
			BitData_Reverse(&temp, 0, crcDef->m_o4Width);
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values", 0);
	}

    return crcDef->m_o4XorOut ^ o4CRC;
}

void BitData_ReverseEndianess(
	BitData*	bd,
	uint64_t		startByte,
	uint64_t		numElements,
	uint32_t		byteWidth
)
{
	if (_BitDataFieldOk(bd, startByte << 3, (numElements << 3) * byteWidth))
	{
		// for non-VC compilers, see http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
		// WORD-sized swapping looks faster doing it manually

		uint8_t* startPtr = (uint8_t*)bd->buffer + startByte;
		if (byteWidth == 2)
		{
			uint16_t* unit16ptr = (uint16_t*)startPtr;
			while(numElements--)
			{
				*unit16ptr =
						(((uint8_t*)unit16ptr)[0] << 8)
					|	(((uint8_t*)unit16ptr)[1]);
				unit16ptr++;
			}
		}
		else if (byteWidth == 4)
		{
			uint32_t* unit32ptr = (uint32_t*)startPtr;
			while(numElements--)
			{
#if defined(HOST_WINDOWS)
				*unit32ptr = _byteswap_ulong(*unit32ptr);
#elif defined(HOST_LINUX) || defined(HOST_DARWIN)
				*unit32ptr = __builtin_bswap32(*unit32ptr);
#endif
				unit32ptr++;
			}
		}
		else if (byteWidth == 8)
		{
			uint64_t* unit64ptr = (uint64_t*)startPtr;
			while(numElements--)
			{
#if defined(HOST_WINDOWS)
				*unit64ptr = _byteswap_uint64(*unit64ptr);
#elif defined(HOST_LINUX) || defined(HOST_DARWIN)
				*unit64ptr = __builtin_bswap64(*unit64ptr);
#endif
				unit64ptr++;
			}
		}
		else
		{
			BD_ASSERTFAIL("Bad param values");
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_Reverse(
	BitData*	bd,
	uint64_t		start,
	uint64_t		size
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
        uint64_t i8Remaining = size;
        uint64_t nChunkStart = start;
        uint64_t nChunkBStart = start + (size - c_i4ChunkSize);

        while (i8Remaining > 1)
        {
            // get the next chunk
            if (i8Remaining >= c_i4ChunkSize)
            {
                UIntBase o4ChunkA = GetFullChunk(bd, nChunkStart);

                // reverse all the bits in the chunk
#ifdef HOST_X64
                o4ChunkA = (o4ChunkA >> 32) | (o4ChunkA << 32);
                o4ChunkA = ((o4ChunkA & 0xffff0000ffff0000) >> 16) | ((o4ChunkA & 0x0000ffff0000ffff) << 16);
                o4ChunkA = ((o4ChunkA & 0xff00ff00ff00ff00) >> 8) | ((o4ChunkA & 0x00ff00ff00ff00ff) << 8);
                o4ChunkA = ((o4ChunkA & 0xf0f0f0f0f0f0f0f0) >> 4) | ((o4ChunkA & 0x0f0f0f0f0f0f0f0f) << 4);
                o4ChunkA = ((o4ChunkA & 0xCCCCCCCCCCCCCCCC) >> 2) | ((o4ChunkA & 0x3333333333333333) << 2);
                o4ChunkA = ((o4ChunkA & 0xAAAAAAAAAAAAAAAA) >> 1) | ((o4ChunkA & 0x5555555555555555) << 1);
#else
                o4ChunkA = (o4ChunkA >> 16) | (o4ChunkA << 16);
                o4ChunkA = ((o4ChunkA & 0xff00ff00) >> 8) | ((o4ChunkA & 0x00ff00ff) << 8);
                o4ChunkA = ((o4ChunkA & 0xf0f0f0f0) >> 4) | ((o4ChunkA & 0x0f0f0f0f) << 4);
                o4ChunkA = ((o4ChunkA & 0xCCCCCCCC) >> 2) | ((o4ChunkA & 0x33333333) << 2);
                o4ChunkA = ((o4ChunkA & 0xAAAAAAAA) >> 1) | ((o4ChunkA & 0x55555555) << 1);
#endif
				if (nChunkStart != nChunkBStart)
                {
	                UIntBase o4ChunkB = GetFullChunk(bd, nChunkBStart);

                    // reverse all the bits in the chunk
#ifdef HOST_X64
                    o4ChunkB = (o4ChunkB >> 32) | (o4ChunkB << 32);
                    o4ChunkB = ((o4ChunkB & 0xffff0000ffff0000) >> 16) | ((o4ChunkB & 0x0000ffff0000ffff) << 16);
                    o4ChunkB = ((o4ChunkB & 0xff00ff00ff00ff00) >> 8) | ((o4ChunkB & 0x00ff00ff00ff00ff) << 8);
                    o4ChunkB = ((o4ChunkB & 0xf0f0f0f0f0f0f0f0) >> 4) | ((o4ChunkB & 0x0f0f0f0f0f0f0f0f) << 4);
                    o4ChunkB = ((o4ChunkB & 0xCCCCCCCCCCCCCCCC) >> 2) | ((o4ChunkB & 0x3333333333333333) << 2);
                    o4ChunkB = ((o4ChunkB & 0xAAAAAAAAAAAAAAAA) >> 1) | ((o4ChunkB & 0x5555555555555555) << 1);
#else
                    o4ChunkB = (o4ChunkB >> 16) | (o4ChunkB << 16);
                    o4ChunkB = ((o4ChunkB & 0xff00ff00) >> 8) | ((o4ChunkB & 0x00ff00ff) << 8);
                    o4ChunkB = ((o4ChunkB & 0xf0f0f0f0) >> 4) | ((o4ChunkB & 0x0f0f0f0f) << 4);
                    o4ChunkB = ((o4ChunkB & 0xCCCCCCCC) >> 2) | ((o4ChunkB & 0x33333333) << 2);
                    o4ChunkB = ((o4ChunkB & 0xAAAAAAAA) >> 1) | ((o4ChunkB & 0x55555555) << 1);
#endif
					SetFullChunk(bd, nChunkStart, o4ChunkB);
                }

				SetFullChunk(bd, nChunkBStart, o4ChunkA);

                i8Remaining = (i8Remaining > (c_i4ChunkSize << 1)) ? i8Remaining - (c_i4ChunkSize << 1) : 0;
                nChunkBStart -= c_i4ChunkSize;
            }
            else
            {
                UIntBase o4Chunk = GetPartialChunk(bd, nChunkStart, (int)i8Remaining);

                // reverse all the bits in the chunk
#ifdef HOST_X64
                    o4Chunk = (o4Chunk >> 32) | (o4Chunk << 32);
                    o4Chunk = ((o4Chunk & 0xffff0000ffff0000) >> 16) | ((o4Chunk & 0x0000ffff0000ffff) << 16);
                    o4Chunk = ((o4Chunk & 0xff00ff00ff00ff00) >> 8) | ((o4Chunk & 0x00ff00ff00ff00ff) << 8);
                    o4Chunk = ((o4Chunk & 0xf0f0f0f0f0f0f0f0) >> 4) | ((o4Chunk & 0x0f0f0f0f0f0f0f0f) << 4);
                    o4Chunk = ((o4Chunk & 0xCCCCCCCCCCCCCCCC) >> 2) | ((o4Chunk & 0x3333333333333333) << 2);
                    o4Chunk = ((o4Chunk & 0xAAAAAAAAAAAAAAAA) >> 1) | ((o4Chunk & 0x5555555555555555) << 1);
#else
                    o4Chunk = (o4Chunk >> 16) | (o4Chunk << 16);
                    o4Chunk = ((o4Chunk & 0xff00ff00) >> 8) | ((o4Chunk & 0x00ff00ff) << 8);
                    o4Chunk = ((o4Chunk & 0xf0f0f0f0) >> 4) | ((o4Chunk & 0x0f0f0f0f) << 4);
                    o4Chunk = ((o4Chunk & 0xCCCCCCCC) >> 2) | ((o4Chunk & 0x33333333) << 2);
                    o4Chunk = ((o4Chunk & 0xAAAAAAAA) >> 1) | ((o4Chunk & 0x55555555) << 1);
#endif
                SetPartialChunk(bd, nChunkStart, (int)i8Remaining, (o4Chunk >> (64 - (int)i8Remaining)));

                i8Remaining = 0;
            }

            nChunkStart += c_i4ChunkSize;
        }
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_ShiftLeft(
	BitData*	bd,
	uint64_t		start,
	uint64_t		size,
	uint64_t		count
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
		if (size > count)
		{
			BitData_Copy(bd, start, bd, start + count, (size - count));
		}
		else
		{
			count = size;
		}

		BitData_InitValue(bd, start, count, 0);
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_ShiftRight(
	BitData*	bd,
	uint64_t		start,
	uint64_t		size,
	uint64_t		count
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
		if (size > count)
		{
			BitData_Copy(bd, start + count, bd, start, (size - count));
		}
		else
		{
			count = size;
		}

		BitData_InitValue(bd, start + (size - count), count, 0);
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_Copy(
	const BitData*	src,
	uint64_t			srcStart,
	BitData*		dst,
	uint64_t			dstStart,
	uint64_t			count
)
{
	if (_BitDataFieldOk(src, srcStart, count) && _BitDataFieldOk(dst, dstStart, count))
	{
		// special case most common (single bit) usage for performance
		if (count == 1)
		{
			if ((((uint8_t*)src->buffer)[srcStart>>3] >> (srcStart&0x7))&1)
			{
				((uint8_t*)dst->buffer)[dstStart>>3] |= (1<<(dstStart&0x7));
			}
			else
			{
				((uint8_t*)dst->buffer)[dstStart>>3] &= ~(1<<(dstStart&0x7));
			}
		}
		else
		{
			uint64_t nSrcPos	= srcStart;	// current position in the source
			uint64_t nDstPos	= dstStart;	// current position in the destination
			uint64_t nCount	= count;		// current count of bits to copy

			// if dstStart and srcStart are byte aligned and at least a chunk's
			// length of bits must be copied, do a memmove for performance
			if ((count >= c_i4ChunkSize) && (((dstStart | srcStart) & 0x7) == 0))
			{
				size_t nBytes = (size_t)(count >> 3);
				memmove(((uint8_t*)dst->buffer) + (dstStart>>3), ((uint8_t*)src->buffer) + (srcStart>>3), nBytes);

				size_t nBitsCopied = nBytes << 3;

				nDstPos += nBitsCopied;
				nSrcPos += nBitsCopied;
				nCount -= nBitsCopied;
			}

			// if source bit addr is greater than destination bit addr, copy from bottom up, otherwise copy from top down
			if ((((uint64_t)(dst->buffer)*8) + dstStart) < (((uint64_t)(src->buffer)*8) + srcStart))
			{
				// transfer remaining chunks
				while (nCount)
				{
					if (nCount >= c_i4ChunkSize)
					{
						SetFullChunk(dst, nDstPos, GetFullChunk(src, nSrcPos));
						nCount -= c_i4ChunkSize;
						nDstPos += c_i4ChunkSize;
						nSrcPos += c_i4ChunkSize;
					}
					else
					{
						SetPartialChunk(dst, nDstPos, (uint32_t)nCount, GetPartialChunk(src, nSrcPos, nCount));
						nCount = 0;
					}
				}
			}
			else
			{
				nSrcPos += nCount;
				nDstPos += nCount;

				// transfer remaining chunks
				while (nCount)
				{
					if (nCount >= c_i4ChunkSize)
					{
						nDstPos -= c_i4ChunkSize;
						nSrcPos -= c_i4ChunkSize;
						SetFullChunk(dst, nDstPos, GetFullChunk(src, nSrcPos));
						nCount -= c_i4ChunkSize;
					}
					else
					{
						nDstPos -= nCount;
						nSrcPos -= nCount;
						SetPartialChunk(dst, nDstPos, (uint32_t)nCount, GetPartialChunk(src, nSrcPos, nCount));
						nCount = 0;
					}
				}
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_Compare(
	const BitData*	bd1,
	uint64_t			start1,
	const BitData*	bd2,
	uint64_t			start2,
	uint64_t			size,
	int32_t*			presult
)
{
	if (_BitDataFieldOk(bd1, start1, size) && _BitDataFieldOk(bd2, start2, size))
	{
		*presult = 0;
		uint64_t count = size;
		UIntBase chunk1 = 0;
		UIntBase chunk2 = 0;

		while ((count != 0) && (0 == *presult))
		{
			if (count >= c_i4ChunkSize)
			{
				count -= c_i4ChunkSize;
				chunk1 = GetFullChunk(bd1, start1 + count);
				chunk2 = GetFullChunk(bd2, start2 + count);
			}
			else
			{
				chunk1 = GetPartialChunk(bd1, start1, count);
				chunk2 = GetPartialChunk(bd2, start2, count);
				count = 0;
			}

			if (chunk1 != chunk2)
			{
				*presult = (chunk1 > chunk2) ? 1 : -1;
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_BitwiseOr(
	BitData*		dest,
	uint64_t			deststart,
	const BitData*	src1,
	uint64_t			src1start,
	const BitData*	src2,
	uint64_t			src2start,
	uint64_t			size
)
{
	if (_BitDataFieldOk(src1, src1start, size) && _BitDataFieldOk(src2, src2start, size) && _BitDataFieldOk(dest, deststart, size))
	{
		uint64_t count = size;
		UIntBase chunk1 = 0;
		UIntBase chunk2 = 0;

		while (count != 0)
		{
			if (count >= c_i4ChunkSize)
			{
				count -= c_i4ChunkSize;
				chunk1 = GetFullChunk(src1, src1start + count);
				chunk2 = GetFullChunk(src2, src2start + count);
				SetFullChunk(dest, deststart + count, chunk1 | chunk2);
			}
			else
			{
				chunk1 = GetPartialChunk(src1, src1start, count);
				chunk2 = GetPartialChunk(src2, src2start, count);
				SetPartialChunk(dest, deststart, (uint32_t)count, chunk1 | chunk2);
				count = 0;
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_BitwiseAnd(
	BitData*		dest,
	uint64_t			deststart,
	const BitData*	src1,
	uint64_t			src1start,
	const BitData*	src2,
	uint64_t			src2start,
	uint64_t			size
)
{
	if (_BitDataFieldOk(src1, src1start, size) && _BitDataFieldOk(src2, src2start, size) && _BitDataFieldOk(dest, deststart, size))
	{
		uint64_t count = size;
		UIntBase chunk1 = 0;
		UIntBase chunk2 = 0;

		while (count != 0)
		{
			if (count >= c_i4ChunkSize)
			{
				count -= c_i4ChunkSize;
				chunk1 = GetFullChunk(src1, src1start + count);
				chunk2 = GetFullChunk(src2, src2start + count);
				SetFullChunk(dest, deststart + count, chunk1 & chunk2);
			}
			else
			{
				chunk1 = GetPartialChunk(src1, src1start, count);
				chunk2 = GetPartialChunk(src2, src2start, count);
				SetPartialChunk(dest, deststart, (uint32_t)count, chunk1 & chunk2);
				count = 0;
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_BitwiseXor(
	BitData*		dest,
	uint64_t			deststart,
	const BitData*	src1,
	uint64_t			src1start,
	const BitData*	src2,
	uint64_t			src2start,
	uint64_t			size
)
{
	if (_BitDataFieldOk(src1, src1start, size) && _BitDataFieldOk(src2, src2start, size) && _BitDataFieldOk(dest, deststart, size))
	{
		uint64_t count = size;
		UIntBase chunk1 = 0;
		UIntBase chunk2 = 0;

		while (count != 0)
		{
			if (count >= c_i4ChunkSize)
			{
				count -= c_i4ChunkSize;
				chunk1 = GetFullChunk(src1, src1start + count);
				chunk2 = GetFullChunk(src2, src2start + count);
				SetFullChunk(dest, deststart + count, chunk1 ^ chunk2);
			}
			else
			{
				chunk1 = GetPartialChunk(src1, src1start, count);
				chunk2 = GetPartialChunk(src2, src2start, count);
				SetPartialChunk(dest, deststart, (uint32_t)count, chunk1 ^ chunk2);
				count = 0;
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_BitwiseSelect(
	BitData*		dest,
	uint64_t			deststart,
	const BitData*	src1,
	uint64_t			src1start,
	const BitData*	src2,
	uint64_t			src2start,
	const BitData*	mask,
	uint64_t			maskstart,
	uint64_t			size
)
{
	if (_BitDataFieldOk(src1, src1start, size) && _BitDataFieldOk(src2, src2start, size) && _BitDataFieldOk(mask, maskstart, size) && _BitDataFieldOk(dest, deststart, size))
	{
		uint64_t count = size;
		UIntBase chunk1 = 0;
		UIntBase chunk2 = 0;
		UIntBase chunkmask = 0;

		while (count != 0)
		{
			if (count >= c_i4ChunkSize)
			{
				count -= c_i4ChunkSize;
				chunk1 = GetFullChunk(src1, src1start + count);
				chunk2 = GetFullChunk(src2, src2start + count);
				chunkmask = GetFullChunk(mask, maskstart + count);
				SetFullChunk(dest, deststart + count, (chunk1 & ~chunkmask) | (chunk2 & chunkmask));
			}
			else
			{
				chunk1 = GetPartialChunk(src1, src1start, count);
				chunk2 = GetPartialChunk(src2, src2start, count);
				chunkmask = GetPartialChunk(mask, maskstart, count);
				SetPartialChunk(dest, deststart, (uint32_t)count, (chunk1 & ~chunkmask) | (chunk2 & chunkmask));
				count = 0;
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_IsIdentical(
	const BitData*	bd1,
	const BitData*	bd2,
	bool* pidentical
)
{
	BD_ASSERT((bd1 != 0) && (bd2 != 0) && (pidentical != 0), "Bad param value");
    *pidentical = (bd1->bitsize == bd2->bitsize);
	if (*pidentical)
	{
		uint64_t numfullbytes = bd1->bitsize>>3;
		uint32_t numextrabits = bd1->bitsize&7;
		if (	((numfullbytes > 0) && (memcmp(bd1->buffer, bd2->buffer, (size_t)numfullbytes) != 0))
			||	((numextrabits != 0) && (((((uint8_t*)bd1->buffer)[numfullbytes] ^ ((uint8_t*)bd2->buffer)[numfullbytes]) & ((1<<numextrabits) - 1)) != 0))
			)
		{
			*pidentical = false;
		}
	}
}

void BitData_ToBinary(
	const BitData*	bd,
	uint64_t		start,
	uint64_t		size,
	char*	    	strbuffer,
	uint32_t		buffersize
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
        int64_t count = size;
		// character array needed for string is bitsize + 3 for '0b' and zero termination
		if (buffersize >= size + 3)
		{
			char* char_ptr = strbuffer;
			*char_ptr++ = '0';
			*char_ptr++ = 'b';

			while (count != 0)
			{
				int chunkbits;
				UIntBase chunk;

				if (count >= c_i4ChunkSize)
				{
					count -= c_i4ChunkSize;
					chunk = GetFullChunk(bd, start + count);
					chunkbits = c_i4ChunkSize;
				}
				else
				{
					chunkbits = (int)count;
					chunk = GetPartialChunk(bd, start, count);
					count = 0;
				}

				// take care of whole nibbles en masse
				while (chunkbits >= 4)
				{
					chunkbits -= 4;
					*((uint32_t*)char_ptr) = bitdigits[(chunk >> chunkbits) & 0xf];
					char_ptr += 4;
				}

				// take care of remnants
				while (chunkbits--)
				{
					*char_ptr++ = (char)(((int)(chunk >> chunkbits) & 0x1) + '0');
				}
			}

			*char_ptr = 0;
		}
		else
		{
			BD_ASSERTFAIL("Bad param values");
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_ToHex(
	const BitData*	bd,
	uint64_t		start,
	uint64_t		size,
	char*	    	strbuffer,
	uint32_t		buffersize
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
        int64_t count = size;

		// character array needed for string is the number of hex digits needed + 3 for '0x' and zero termination
		if ((size + 3)/4 <= buffersize + 3)
		{
			char* char_ptr = strbuffer;
			*char_ptr++ = '0';
			*char_ptr++ = 'x';

			while (count != 0)
			{
				int32_t c_nChunkBits;
				UIntBase chunk;

				if ((count&c_o4ChunkMask) != 0)
				{
					c_nChunkBits = (int)(count & c_o4ChunkMask);
					chunk = GetPartialChunk(bd, (count - c_nChunkBits) + start, c_nChunkBits);
				}
				else
				{
					c_nChunkBits = c_i4ChunkSize;
					chunk = GetFullChunk(bd, (count - c_nChunkBits) + start);
				}

				count -= c_nChunkBits;

                for (c_nChunkBits = (c_nChunkBits - 1) & 0x3c; c_nChunkBits >= 0; c_nChunkBits -= 4)
                {
					*char_ptr++ = cstrHexDigits[(chunk >> c_nChunkBits) & 0xf];
                }
			}

			*char_ptr = 0;
		}
		else
		{
			BD_ASSERTFAIL("Bad param values");
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_ToDecimal(
	const BitData*	bd,
	uint64_t		start,
	uint64_t		size,
	char*		strbuffer,
	uint32_t		buffersize
)
{
	if (_BitDataFieldOk(bd, start, size))
	{
		uint64_t o4NumBits = size;
 		// get the most significant digit that is set -- this is the longest we will convert
		if (o4NumBits > 64)
		{
			BitData_FindLast(bd, start, o4NumBits, &o4NumBits);
			o4NumBits++;
		}

		if (o4NumBits == 0)
		{
			strbuffer[0] = '0';
			strbuffer[1] = 0;
		}
		else if (o4NumBits <= 32)
		{
			uint32_t value = (uint32_t)BitData_GetUpToUInt64(bd, start, (uint32_t)o4NumBits);
            CommonUtils::SafeStringFormat(strbuffer, buffersize, "%" PRIu32, value);
		}
		else if (o4NumBits <= 64)
		{
			uint64_t value = BitData_GetUpToUInt64(bd, start, (uint32_t)o4NumBits);
            CommonUtils::SafeStringFormat(strbuffer, buffersize, "%" PRIu64, value);
		}
		else
		{
			BitData test;
			char* tempBufferptr = 0;
			// use the stack memory for sizes smaller than 256b
			char tempBuffer[32];
			if (o4NumBits <= 256)
			{
				test = BitData_CreateLocalFromBuffer(o4NumBits, o4NumBits, tempBuffer);
			}
			else
			{
				tempBufferptr = new char[BitsToUint8s(o4NumBits) + c_o4AlignedPtrMask];
				test = BitData_CreateLocalFromBuffer(o4NumBits, o4NumBits, tempBufferptr);
			}
			BitData_Copy(bd, start, &test, 0, o4NumBits);

			strbuffer[0] = 0;
			char szTemp[32];
			szTemp[0] = 0;
			size_t tempchars = 0;
			size_t totalchars = 1;

			while (o4NumBits > 0)
			{
				if (o4NumBits <= 32)
				{
                    int ret = CommonUtils::SafeStringFormat(szTemp, sizeof(szTemp), "%" PRIu32, *(uint32_t*)test.buffer);
                    if (ret >= 0)
                    {
                        tempchars = static_cast<size_t>(ret);
                    }
					o4NumBits = 0;
				}
				else if (o4NumBits <= 64)
				{
                    int ret = CommonUtils::SafeStringFormat(szTemp, sizeof(szTemp), "%" PRIu64, *(uint64_t*)test.buffer);
                    if (ret >= 0)
                    {
                        tempchars = static_cast<size_t>(ret);
                    }
                    o4NumBits = 0;
				}
				else
				{
                    tempchars = CommonUtils::SafeStringFormat(szTemp, sizeof(szTemp), "%09" PRIu32, _DivideByUInt32(&test, 1000000000));
					BitData_FindLast(&test, 0, o4NumBits, &o4NumBits);
					o4NumBits++;
				}

				if (tempchars + totalchars > (size_t)buffersize)
				{
					o4NumBits = 0;
					tempchars = buffersize - totalchars;
				}

				memmove(strbuffer + tempchars, strbuffer, totalchars);
                CommonUtils::MemoryCopy(strbuffer, buffersize, szTemp, tempchars);
				totalchars += tempchars;
			}

			if (tempBufferptr != 0)
			{
				delete [] tempBufferptr;
			}
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_ToString(
	const BitData*	bd,
	uint64_t		start,
	uint64_t		size,
	char*		strbuffer,
	uint32_t		buffersize
)
{
	if (buffersize != 0)
	{
		strbuffer[0] = 0;

		if (bd == 0)
		{
            CommonUtils::SafeStringCopy(strbuffer, buffersize, "null");
		}
		else if ((bd->buffer == 0) || (bd->bitsize == 0))
		{
            CommonUtils::SafeStringCopy(strbuffer, buffersize, "empty");
		}
		else
		{
			auto bitSizeCharCount = CommonUtils::SafeStringFormat(strbuffer, buffersize, "[%" PRIu64 "b] ", size);

            if (bitSizeCharCount == -1) // -1 means that SafeStringFormat() had to truncate the string that was intended to go in the buffer.
            {
				BD_ASSERTFAIL("The buffer is not large enough.");
            }
            else if (bitSizeCharCount < 0)
            {
                BD_ASSERTFAIL("Failed to encode the bitdata size into the string")
            }
            else
			{
				BitData_ToHex(bd, start, size, strbuffer + bitSizeCharCount, buffersize - static_cast<uint32_t>(bitSizeCharCount));
			}
		}
	}
}

void BitData_FromString(
	BitData*	bd,
	uint64_t		start,
	uint64_t	    numBitsToSet,
	const char*	strbuffer
)
{
    // Skip whitespace
    while (isspace(*strbuffer))
    {
        ++strbuffer;
    }

	if (_BitDataFieldOk(bd, start, numBitsToSet))
	{
        size_t stringLength = strlen(strbuffer);
        uint64_t numBitsToZero = 0;

        if ((stringLength > 2) && ('0' == strbuffer[0]) && ('x' == strbuffer[1]))
        {
            // find the end of the hex number
            size_t lastHexDigit = 2;
            char chTemp;
            while ((lastHexDigit < stringLength) &&
                    ((((chTemp = strbuffer[lastHexDigit]) >= '0') && (chTemp <= '9'))
                    || ((chTemp >= 'a') && (chTemp <= 'f'))
                    || ((chTemp >= 'A') && (chTemp <= 'F')))
            )
            {
                lastHexDigit++;
            }

            UIntBase chunk = 0;

            // this is the number of digits we'll actually consume, as our destination field allows
            size_t numDigitsToConsume = lastHexDigit - 2;

            if(((lastHexDigit - 2) * 4) > numBitsToSet)
            {
                numDigitsToConsume = static_cast<size_t>((numBitsToSet + 3) / 4);
            }

            size_t numBitsToConsume = numDigitsToConsume * 4; //There are 4 bits for every hex character

            // Number of bits we'll need to zero out in the BitData object if the amount of bits
            // we're supposed to set is larger than the amount of bits provided by the string
            if (numBitsToSet > numBitsToConsume)
            {
                numBitsToZero = numBitsToSet - numBitsToConsume;
            }

            const size_t lastDigitInChunk = (c_i4ChunkSize / 4) - 1;
            const size_t c_oDigitsMask = ~lastDigitInChunk;

            size_t currentDigit = 0;
            for (; currentDigit < numDigitsToConsume; currentDigit++)
            {
				chunk |= ((UIntBase)sc_o4MaskedAsciiToHexVal[strbuffer[--lastHexDigit]&0x1f] << ((currentDigit & 0xf) << 2));

                if ((currentDigit & lastDigitInChunk) == lastDigitInChunk)
                {
                    // We have a full chunk. Time to write it
                    uint64_t currentBitDataBitPosition = start + ((currentDigit & c_oDigitsMask) << 2);
                    if (((currentDigit + 1) == numDigitsToConsume) && (((currentDigit << 2) + 3) >= numBitsToSet))
                    {
                        //This chunk is bigger than the amount of bits we can still set. We should construct a partial chunk
                        SetPartialChunk(bd, currentBitDataBitPosition, static_cast<uint32_t>(numBitsToSet & c_o4ChunkMask), chunk);
                    }
                    else
                    {
                        SetFullChunk(bd, currentBitDataBitPosition, chunk);
                        chunk = 0;
                    }
                }
            }

            // if we haven't finished our last DWord write, do it now
            if ((currentDigit & lastDigitInChunk) != 0)
            {
                size_t chunkBits = (currentDigit & lastDigitInChunk) << 2;
                if ((currentDigit << 2) > numBitsToSet)
                {
                    chunkBits = (numBitsToSet & c_o4ChunkMask);
                }

                uint64_t currentBitDataBitPosition = start + ((currentDigit & c_oDigitsMask) << 2);
                SetPartialChunk(bd, currentBitDataBitPosition, static_cast<uint32_t>(chunkBits), chunk);
            }
        }
        else if ((stringLength > 2) && ('0' == strbuffer[0]) && ('b' == strbuffer[1]))
        {
            // find the end of the binary number
            size_t lastBinaryNumber = 2;
            char chTemp;
            while ((lastBinaryNumber < stringLength) && (((chTemp = strbuffer[lastBinaryNumber]) == '0') || (chTemp == '1')))
            {
                lastBinaryNumber++;
            }

            // the number of digits we'll actually need to consume from the string
            size_t numDigitsToConsume = ((lastBinaryNumber - 2) > numBitsToSet) ? static_cast<size_t>(numBitsToSet) : (lastBinaryNumber - 2);

            // number of bits we'll need to zero out because the field size is larger than the binary text
            numBitsToZero = numBitsToSet - numDigitsToConsume;

            UIntBase chunk = 0;

            const size_t lastDigitInChunk = c_i4ChunkSize - 1;
            const size_t c_oDigitsMask = ~lastDigitInChunk;

            size_t currentDigit = 0;
            for (; currentDigit < numDigitsToConsume; currentDigit++)
            {
                if (strbuffer[--lastBinaryNumber] == '1')
                {
                    chunk |= c_iOne << (currentDigit & c_o4ChunkMask);
                }

                if ((currentDigit & lastDigitInChunk) == lastDigitInChunk)
                {
                    SetFullChunk(bd, start + (currentDigit & c_oDigitsMask), chunk);
                    chunk = 0;
                }
            }

            // if we haven't finished our last DWord write, do it now
            if ((currentDigit & c_o4ChunkMask) != 0)
            {
				SetPartialChunk(bd, start + (currentDigit & c_oDigitsMask), static_cast<uint32_t>(currentDigit & c_o4ChunkMask), chunk);
            }
        }
		else if ((stringLength > 0) && (strbuffer[0] >= '0') && (strbuffer[0] <= '9'))
		{
            // It's a decimal integer
            size_t lastNumber = 1;
            char chTemp;
            while ((lastNumber < stringLength) && (((chTemp = strbuffer[lastNumber]) >= '0') && (chTemp <= '9')))
            {
                lastNumber++;
            }

            // Figure out how many bits will be required to store that integral value
            size_t numBitsRequired = 1 + static_cast<size_t>(ceil(LOG_2_10 * lastNumber));
            size_t numBitsToConsume = (numBitsToSet > numBitsRequired) ? numBitsRequired : static_cast<size_t>(numBitsToSet);
			if (numBitsToConsume <= 64)
			{
				uint64_t value = 0;
#if defined(HOST_WINDOWS)
				int i = sscanf_s(strbuffer, "%" PRIu64, &value);
#elif defined(HOST_LINUX) || defined(HOST_DARWIN)
				int i = sscanf(strbuffer, "%" PRIu64, &value);
#endif
				//sscanf returns the number of items in the argument list that it successfully filled
                //Since we only have 1 argument, a return of "1" means success
				if (i == 1)
				{
					BitData_SetUpToUInt64(bd, start, static_cast<uint32_t>(numBitsToConsume), value);
				}
			}
			else
			{
				size_t numBytesToConsume = BitsToUint8s(numBitsToConsume);
				BitData cbdResult;
				char* tempBufferptr = nullptr;
				// use the stack memory for sizes smaller than 32B
				char tempBuffer[32];
				if (numBytesToConsume <= sizeof(tempBuffer))
				{
					cbdResult = BitData_CreateLocalFromBuffer(numBitsToConsume, numBitsToConsume, tempBuffer);
				}
				else
				{
					tempBufferptr = new char[numBytesToConsume + c_o4AlignedPtrMask];
					cbdResult = BitData_CreateLocalFromBuffer(numBitsToConsume, numBitsToConsume, tempBufferptr);
				}

				memset(cbdResult.buffer, 0, numBytesToConsume);

                uint32_t currentDWord = 0;
                size_t currentDigit = 0;
                for (currentDigit = 0; currentDigit < lastNumber; currentDigit++)
                {
                    currentDWord = (currentDWord * 10) + static_cast<uint32_t>(strbuffer[currentDigit] - '0');
                    if ((currentDigit % 9) == 8)
                    {
                        //We have a full DWORD. Time to write it down
                        if (currentDigit > 8)
                        {
                            _MultiplyByUInt32(&cbdResult, 1000000000);
                        }

						_AddUInt32(&cbdResult, currentDWord);
                        currentDWord = 0;
                    }
                }

                if ((currentDigit % 9) != 0)
                {
                    //We have some leftover stuff to write out
                    if (currentDigit > 8)
                    {
						_MultiplyByUInt32(&cbdResult, sc_ao4Multiplier[currentDigit % 9]);
                    }

					_AddUInt32(&cbdResult, currentDWord);
                }

                BitData_Copy(&cbdResult, 0, bd, start, numBitsToConsume);

                if (numBitsToSet > numBitsToConsume)
                {
                    numBitsToZero = numBitsToSet - numBitsToConsume;
                }

				if (tempBufferptr != nullptr)
				{
					delete [] tempBufferptr;
				}
			}
		}

		if (numBitsToZero > 0)
		{
			BitData_InitValue(bd, start + (numBitsToSet - numBitsToZero), numBitsToZero, 0);
		}
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_InsertUint64(
	BitData*	bd,
	uint64_t	start,
	uint64_t	size,
	uint64_t	value
	)
{
	BD_ASSERT(bd != nullptr, "The BitData pointer passed in is null.");
	BD_ASSERT(start <= bd->bitsize, "The start of the insert is beyond the bounds of the BitData.");
	BD_ASSERT(size  <= 64, "The insert size needs to be less than 65 bits.");

	uint64_t originalSize = bd->bitsize;
	BitData_Resize(bd, bd->bitsize + size);

	// left shift any MSB data that is getting displaced
	if (start != originalSize)
	{
		BitData_ShiftLeft(bd, start, bd->bitsize - start, size);
	}

	BitData_SetUpToUInt64(bd, start, (uint32_t)size, value);
}

void BitData_InsertBitData(
	BitData*		bd,
	uint64_t		start,
	const BitData*	bdSrc,
	uint64_t		startSrc,
	uint64_t		size
	)
{
	BD_ASSERT(bd != nullptr, "The BitData pointer passed in is null.");
	BD_ASSERT(start <= bd->bitsize, "The start of the insert is beyond the bounds of the BitData.");

	if (_BitDataFieldOk(bdSrc, startSrc, size))
	{
		uint64_t originalSize = bd->bitsize;
		BitData_Resize(bd, bd->bitsize + size);

		// left shift any MSB data that is getting displaced
		if (start != originalSize)
		{
			BitData_ShiftLeft(bd, start, bd->bitsize - start, size);
		}

		BitData_Copy(bdSrc, startSrc, bd, start, size);
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_Append(
	BitData*		bd,
	const BitData*	bdSrc
	)
{
	uint64_t oldsize = bd->bitsize;
	BitData_Resize(bd, bd->bitsize + bdSrc->bitsize);
	BitData_Copy(bdSrc, 0, bd, oldsize, bdSrc->bitsize);
}


void BitData_Delete(
	BitData*	bd,
	uint64_t		start,
	uint64_t		size
	)
{
	if (_BitDataFieldOk(bd, start, size))
	{
		// rshift any data that still exists in the MSB's
		if (start + size < bd->bitsize)
		{
			BitData_ShiftRight(bd, start, bd->bitsize - start, size);
		}

		BitData_Resize(bd, bd->bitsize - size);
	}
	else
	{
		BD_ASSERTFAIL("Bad param values");
	}
}

void BitData_Resize(
	BitData*	bd,
	uint64_t		size
	)
{
	if (bd->obj != 0)
	{
		bd->bitsize = size;
		std::vector<UIntBase>* pvec = (std::vector<UIntBase>*)bd->obj;
		pvec->resize((uint32_t)((size + c_i4ChunkSize - 1)/c_i4ChunkSize));
		bd->bitcapacity = pvec->capacity()*c_i4ChunkSize;
		if (bd->bitcapacity > 0)
		{
			bd->buffer = pvec->data();
		}
		else
		{
			bd->buffer = 0;
		}
	}
	else
	{
		if (size <= bd->bitcapacity)
		{
			bd->bitsize = size;
		}
		else
		{
			BD_ASSERTFAIL("Tried to resize beyond the static buffer capacity");
		}
	}
}

BitData BitData_CreateLocalFromBuffer(
	uint64_t size,
	uint64_t capacity,
	void* buffer
)
{
	BitData bd;
	bd.obj = 0;
	bd.bitsize = size;
	bd.bitcapacity = capacity;
	bd.buffer = buffer;
	return bd;
}

BitData* BitData_CreateManagedFromUInt64(
	uint64_t size,
	uint64_t value
)
{
	BitData* bd = new BitData();

	// If the size is greater than 64 bits, then the BitData
	// capacity needs to be large enough to hold all the data.
	uint64_t capacity = std::max((uint64_t)64ULL, size);
	_BDV_Create(bd, size, capacity);
	if (size > 64)
	{
		memset(bd->buffer, 0, BitsToUint8s(size));
	}
	((uint64_t*)bd->buffer)[0] = value;
	return bd;
}

BitData* BitData_CreateManagedFromBuffer(
    uint64_t size,
    const void* buffer
)
{
    BitData* bd = new BitData();
    _BDV_Create(bd, size, size);
    if (size != 0)
    {
        memmove(bd->buffer, buffer, BitsToUint8s(size));
    }
    return bd;
}

BitData* BitData_CreateManagedFromString(
	const char* buffer
)
{
	BitData* bd = new BitData();

	uint64_t size;
#if defined(HOST_WINDOWS)
	int i = sscanf_s(buffer,"[%" PRIu64 "b]", &size);
#elif defined(HOST_LINUX) || defined(HOST_DARWIN)
    int i = sscanf(buffer,"[%" PRIu64 "b]", &size);
#endif
    if (size >= (static_cast<uint64_t>(1)<<32))
    {
        size = (static_cast<uint64_t>(1)<<32)-1;
    }
	if (i != 0)
	{
		_BDV_Create(bd, size, size);
		const char* strValue = strchr(buffer, ']') + 1;
        BitData_FromString(bd, 0, size, strValue);
	}
	else
	{
		size = _StringValueSize(buffer);
		_BDV_Create(bd, size, size);
		BitData_FromString(bd, 0, size, buffer);
	}
	return bd;
}

BitData* BitData_CreateManaged(
	uint64_t size
)
{
	BitData* bd = new BitData();
	_BDV_Create(bd, size, size);
	return bd;
}

BitData* BitData_CreateManagedFromBitData(
	const BitData*	src
)
{
	BitData* bd = new BitData();
	_BDV_Create(bd, src->bitsize, src->bitsize);
	memmove(bd->buffer, src->buffer, BitsToUint8s(src->bitsize));
	return bd;
}

void BitData_FreeManaged(
	BitData* bd
)
{
	BD_ASSERT(bd != nullptr, "The BitData pointer passed in is null.");

	if(bd->obj != nullptr)
	{
		bd->buffer = 0;
		bd->bitsize = 0;
		bd->bitcapacity = 0;
		delete (std::vector<UIntBase>*)(bd->obj);
		bd->obj = 0;
		delete bd;
	}
	else
	{
		BD_ASSERTFAIL("FreeManaged called on a bad BitData or the wrong BitData type (i.e., local/unmanaged)");
	}
}
