HomeForumSourceResearchGuide
Sign in to contribute to source. how it works
Native library OpenCLLib by ben
expand copy to clipboardexpand
//Written by Ben Craine, 2023

/*
 * IMPORTANT OPENCL DEFINITIONS
 * Platform - An opencl implimentation, e.g CUDA, Intel Iris.
 * Context - A set of devices (belonging to the same platform), Command Queues, Kernels and Memory objects
 * Device - A physical compute device available to opencl
 * Command Queue - A queue that points to a single device, requests to execute operations on a device are sent through a command queue that points to that device
 * Kernels - A compiled, executable program associated with a set of parameters (Memory objects), built for a specific device/platform
 * Memory objects - The opencl abstraction of a reserved part of memory on a device that can be read/written or passed as a kernel parameter using opencl API calls
 */

//#define CL_TARGET_OPENCL_VERSION 100

//std lib headers
#include 
#include 
#include 
#include 
#include 

//openCL headers
#include 
#include 

//dana headers
#include "dana_api_1.7/dana_lib_defs.h"
#include "dana_api_1.7/dana_types.h"
#include "dana_api_1.7/nli_util.h"
#include "dana_api_1.7/vmi_util.h"

#define FLOAT 0
#define UINT 1

#define MAX_PLATFORMS 100
#define MAX_DEVICES 100

static CoreAPI *api;

static GlobalTypeLink* intArrayGT = NULL;
static GlobalTypeLink* charArrayGT = NULL;
static GlobalTypeLink* stringArrayGT = NULL;
static GlobalTypeLink* stringItemGT = NULL;
static GlobalTypeLink* intMatrixGT = NULL;
static GlobalTypeLink* decMatrixGT = NULL;
static GlobalTypeLink* decArrayGT = NULL;

static bool initOK = false;

/*
 * these global variables are set in init()
 * and MUST NOT be changed once the above
 * flag is set true or in any function other
 * than init().
 */
static cl_platform_id* globalClPlatforms;
static cl_uint numofPlatforms;
static cl_uint* numOfDevicesPerPlatform;
static cl_device_id** devices;

/* 
 * These structures are used to
 * keep state relating to a given
 * instance of a dana dana component
 * that requires the use of this library
 */

typedef struct _context_list_item {
    cl_context context;
    cl_platform_id platform;
    cl_device_id* devices;
    uint8_t numOfDevices;
    struct _context_list_item* next;
} CONTEXT_LI;

typedef struct _one_per_dana_comp {
    CONTEXT_LI* contexts;
} DANA_COMP;

void destroyContexts(DANA_COMP* contextSpace) {
    if (contextSpace == NULL || contextSpace->contexts == NULL) {
        return;
    }

    CONTEXT_LI* probe = contextSpace->contexts;
    for (; probe->next != NULL; probe = probe->next) {
        clReleaseContext(probe->context);
        for (int i = 0; i < probe->numOfDevices; i++) {
            clReleaseDevice(*(probe->devices+i));
        }
    }
    clReleaseContext(probe->context);
    for (int i = 0; i < probe->numOfDevices; i++) {
        clReleaseDevice(*(probe->devices+i));
    }

    free(contextSpace);
    return;
}

void addNewContext(DANA_COMP* danaComp, CONTEXT_LI* newContext) {
    if (danaComp->contexts == NULL) {
        danaComp->contexts = newContext;
        return;
    }

    CONTEXT_LI* probe = danaComp->contexts;
    for (; probe->next != NULL; probe = probe->next) {
    }
    probe->next = newContext;
}

int getNumOfContexts(DANA_COMP* danaComp) {
    CONTEXT_LI* probe = danaComp->contexts;
    if (danaComp->contexts == NULL) {
        return 0;
    }
    int count = 0;
    for (; probe->next != NULL; probe = probe->next) {
        count++;
    }
    count++;
    return count;
}

CONTEXT_LI* getContextByIndex(DANA_COMP* danaComp, int n) {
    CONTEXT_LI* probe = danaComp->contexts;
    if (danaComp->contexts == NULL) {
        return NULL;
    }
    int count = 0;
    for (; probe->next != NULL; probe = probe->next) {
        if (count == n) {
            return probe;
        }
        count++;
    }
    if (count == n) {
        return probe;
    }
    return NULL;
}

CONTEXT_LI* getContextByDevice(DANA_COMP* danaComp, cl_device_id device) {
    CONTEXT_LI* contextProbe = danaComp->contexts;
    if (contextProbe == NULL) {
        return NULL;
    }

    for (; contextProbe->next != NULL; contextProbe = contextProbe->next) {
        for (int i = 0; i < contextProbe->numOfDevices; i++) {
            if (contextProbe->devices[i] == device) {
                return contextProbe;
            }
        }
    }
    for (int i = 0; i < contextProbe->numOfDevices; i++) {
        if (contextProbe->devices[i] == device) {
            return contextProbe;
        }
    }

    return NULL;
}

typedef struct _log_list_item {
    char* APIFunctionCall;
    int errorCode;
    struct _log_list_item* next;
}LOG_LI;

LOG_LI* logListHead = NULL;
LOG_LI* logListEnd = NULL;

LOG_LI* newLogItem(char* func, int err) {
    LOG_LI* newLog = (LOG_LI*) malloc(sizeof(LOG_LI));
    newLog->APIFunctionCall = func;
    newLog->errorCode = err;
    newLog->next = NULL;
    return newLog;
}

void addLog(LOG_LI* adding) {
    if (logListHead == NULL) {
        logListHead = adding;
        logListEnd = adding;
        return;
    }

    logListEnd->next = adding; 
    logListEnd = adding;
    return;
}

INSTRUCTION_DEF printLogs(FrameData *cframe) {
    LOG_LI* probe = logListHead;
    if (probe == NULL) {
        return RETURN_OK;
    }
    for (; probe->next != NULL; probe = probe->next) {
        printf("%s | %d\n", probe->APIFunctionCall, probe->errorCode);
    }
    printf("%s | %d\n", probe->APIFunctionCall, probe->errorCode);
    return RETURN_OK;
}

INSTRUCTION_DEF createContextSpace(FrameData *cframe) {
    if (numofPlatforms == 0)
        {
        api->returnInt(cframe, 0);
        return RETURN_OK;
        }
    
    DANA_COMP* dana_component_id = (DANA_COMP*) malloc(sizeof(DANA_COMP));
    dana_component_id->contexts = NULL;
    api->returnInt(cframe, (size_t) dana_component_id);
    return RETURN_OK;
}

/*
 * This function sets the global variables that
 * relate to opencl and were declared at the top
 * of this file
 * It also creates the per dana component instance
 * state and returns a reference to that state back
 * to the caller
 * Therefore one Dana component instance may create
 * more than one DANA_COMP. This breaks the intended
 * one to one relationship between DANA_COMP and
 * a dana component instance. However the advantage of
 * this desicion is that a dana component can be made
 * that manages calls to this native library on behalf
 * of other components
 */
static void initOpenCL()
    {
    cl_int CL_err = CL_SUCCESS;
    
    globalClPlatforms = (cl_platform_id*) malloc(sizeof(cl_platform_id)*MAX_PLATFORMS);

    CL_err = clGetPlatformIDs( MAX_PLATFORMS, globalClPlatforms, &numofPlatforms );
    if (CL_err != CL_SUCCESS) {
        numofPlatforms = 0;
        return;
    }

    devices = (cl_device_id**) malloc(sizeof(cl_device_id*)*numofPlatforms);
    numOfDevicesPerPlatform = (cl_uint*) malloc(sizeof(cl_uint)*numofPlatforms);
    
    cl_uint returnNumOfDevices = 0;
    for (int i = 0; i < numofPlatforms; i++) {
        *(devices+i) = (cl_device_id*) malloc(sizeof(cl_device_id)*MAX_DEVICES);
        CL_err = clGetDeviceIDs(*(globalClPlatforms+i), CL_DEVICE_TYPE_ALL, MAX_DEVICES, *(devices+i), &returnNumOfDevices);
        if (CL_err != CL_SUCCESS) {
            printf("Error in clGetDeviceIDs: %d\n", CL_err);
        }
        *(numOfDevicesPerPlatform+i) = returnNumOfDevices;
    }

    initOK = true;
    }

/* Returns to the caller all the device IDs
 * available to the system from the global state
 * i.e that which was set in init(). This function
 * DOES NOT return the device IDs which a dana component
 * instance has created a context for.
 */
INSTRUCTION_DEF getComputeDeviceIDs(FrameData *cframe) {
    //cl_int CL_err = CL_SUCCESS;

    if (numofPlatforms == 0) {
        //return empty array
        DanaEl* newArray = api->makeArray(stringArrayGT, 0, NULL);
        api->returnEl(cframe, newArray);
        //exit
        return RETURN_OK;
    }

    //go thru each platform and total the devices for each platform
    int arrSize = 0;
    for (int i = 0; i < numofPlatforms; i++) {
        arrSize += *(numOfDevicesPerPlatform+i);
    }

    //grab each device C handle
    cl_device_id ids[arrSize];
    int seen = 0;
    //for each platform
    for (int i = 0; i < numofPlatforms; i++) {
        //for each device in that platform
        for (int j = 0; j < *(numOfDevicesPerPlatform+i); j++) {
            ids[seen+j] = *(*(devices+i)+j);
        }
        seen += *(numOfDevicesPerPlatform+i);
    }

    //arrange in a dana array
    DanaEl* returnArray = api->makeArray(intArrayGT, arrSize, NULL);

    for (int i = 0; i < arrSize; i++) {
        api->setArrayCellInt(returnArray, i, (size_t) ids[i]);
    }

    //return
    api->returnEl(cframe, returnArray);

    return RETURN_OK;
}

/*
 * Returns the string names of all the devices available
 * to the system. This is taken from the global state, i.e
 * all devices on the system, not just the devices the 
 * dana component instance has access to through a context.
 */
INSTRUCTION_DEF getComputeDevices(FrameData *cframe) {
    cl_int CL_err = CL_SUCCESS;
    if (numofPlatforms == 0) {
        //return empty array
        DanaEl* newArray = api->makeArray(stringArrayGT, 0, NULL);
        api->returnEl(cframe, newArray);
        //exit
        return RETURN_OK;
    }

    //go thru each platform
    int arrSize = 0;
    for (int i = 0; i < numofPlatforms; i++) {
        arrSize += *(numOfDevicesPerPlatform+i);
    }

    //grab each device ID and query opencl for the device name
    char* deviceNames[arrSize];
    int seen = 0;
    //for each platform
    for (int i = 0; i < numofPlatforms; i++) {
        //for each device in that platform
        for (int j = 0; j < *(numOfDevicesPerPlatform+i); j++) {
            char* buf = (char*) malloc(sizeof(char)*500);
            deviceNames[seen+j] = buf;
            size_t bufReturnSize;
            CL_err = clGetDeviceInfo(*(*(devices+i)+j), CL_DEVICE_NAME, sizeof(char)*500, buf, &bufReturnSize);
            *(deviceNames[seen+j]+bufReturnSize) = '\0';
        }
        seen += *(numOfDevicesPerPlatform+i);
    }

    //arrange device names in a dana array
    DanaEl* returnArray = api->makeArray(stringArrayGT, arrSize, NULL);

    for (int i = 0; i < arrSize; i++) {
        DanaEl* string = api->makeData(stringItemGT);

        size_t sublen = strlen(deviceNames[i]);

        unsigned char* cnt = NULL;
        DanaEl* charArr = api->makeArray(charArrayGT, sublen, &cnt);

        memcpy(cnt, deviceNames[i], sublen);

        api->setDataFieldEl(string, 0, charArr);

        api->setArrayCellEl(returnArray, i, string);

        free(deviceNames[i]);
    }

    //return
    api->returnEl(cframe, returnArray);

    return RETURN_OK;
}

/* A context in opencl is a set of physical devices, command queues pointing to
 * those devices (Many Qs to 1 Device), kernels and memory objects
 * All devices in a context must belong to the same platform
 * We aim to abstract away this contraint in Dana
 * This is achieved by assigning a list of contexts to a DANA_COMP
 * Then when a dana component instance makes requests like read/writes to a device
 * we search for the context that device is tied to under their corrisponding
 * DANA_COMP state
 * Return 0 on success, >0 otherwise
 */
INSTRUCTION_DEF createContext(FrameData* cframe) {
    cl_int CL_Err = CL_SUCCESS;

    //input: array of device IDs
    DanaEl* deviceArray = api->getParamEl(cframe, 0);
    cl_device_id deviceHandles[api->getArrayLength(deviceArray)];

    DANA_COMP* danaComp = (DANA_COMP*) api->getParamInt(cframe, 1);

    for (int i = 0; i < api->getArrayLength(deviceArray); i++) {
        deviceHandles[i] = (cl_device_id) api->getArrayCellInt(deviceArray, i);
    }

    //for each platform
    for (int i = 0; i < numofPlatforms; i++) {
        cl_platform_id platform = *(globalClPlatforms+i);
        cl_device_id deviceHandlesForThisPlat[MAX_DEVICES];
        int deviceForPlatCount = 0;

        //find the intersection between the devices IDs provided
        //by the caller and the Device IDs associated with this platform
        //(uses global state)
        for (int j = 0; j < *(numOfDevicesPerPlatform+i); j++) {
            for (int k = 0; k < api->getArrayLength(deviceArray); k++) {
                if (deviceHandles[k] == *(*(devices+i)+j)) {
                    deviceHandlesForThisPlat[deviceForPlatCount] = deviceHandles[k];
                    deviceForPlatCount++;
                }
            }
        }
        
        // if the intersection is the empty set then move onto the next platform
        // else continue into next code block
        if (deviceForPlatCount != 0) {
            CONTEXT_LI* newContextItem = (CONTEXT_LI*) malloc(sizeof(CONTEXT_LI));
            newContextItem->next = NULL;
            newContextItem->platform = platform;

            //strip the deviceHandlesForThisPlat array down to its mimimum required size
            cl_device_id deviceHandlesForThisPlatCut[deviceForPlatCount];
            newContextItem->devices = (cl_device_id*) malloc(sizeof(cl_device_id)*deviceForPlatCount);
            newContextItem->numOfDevices = deviceForPlatCount;
            for(int j = 0; j < deviceForPlatCount; j++) {
                deviceHandlesForThisPlatCut[j] = deviceHandlesForThisPlat[j];
                newContextItem->devices[j] = deviceHandlesForThisPlatCut[j];
            }

            //create the opencl context
            const cl_context_properties props[] = {CL_CONTEXT_PLATFORM, newContextItem->platform, 0};
            newContextItem->context = clCreateContext(props, newContextItem->numOfDevices, newContextItem->devices, NULL, NULL, &CL_Err);
            if (CL_Err != CL_SUCCESS) {
                addLog(newLogItem("clCreateContext", CL_Err));
                api->returnInt(cframe, (size_t) 1);
                return RETURN_OK;
            }

            addNewContext(danaComp, newContextItem);
        }
    }
    api->returnInt(cframe, (size_t) 0);
    return RETURN_OK;
}

INSTRUCTION_DEF createAsynchQueue(FrameData* cframe) {
    cl_int CL_err = CL_SUCCESS;
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_device_id device = (cl_device_id) rawParam; 

    DANA_COMP* danaComp = (DANA_COMP*) api->getParamInt(cframe, 1);
    CONTEXT_LI* contextItem = getContextByDevice(danaComp, device);
    cl_context context = contextItem->context;

    const cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;

    cl_command_queue newQ = clCreateCommandQueue(context, device, props, &CL_err);
    if(CL_err != CL_SUCCESS) {
        addLog(newLogItem("clCreateCommandQueueWithProperties", CL_err));
        api->returnInt(cframe, (size_t) 0);
        return RETURN_OK;
    }

    api->returnInt(cframe, (size_t) newQ);
    return RETURN_OK;
}

INSTRUCTION_DEF createSynchQueue(FrameData* cframe) {
    cl_int CL_err = CL_SUCCESS;
    size_t rawParam = api->getParamInt(cframe, 0);

    cl_device_id device = (cl_device_id) rawParam; 

    DANA_COMP* danaComp = (DANA_COMP*) api->getParamInt(cframe, 1);
    CONTEXT_LI* contextItem = getContextByDevice(danaComp, device);
    cl_context context = contextItem->context;

    cl_command_queue newQ = clCreateCommandQueue(context, device, 0, &CL_err);
    if(CL_err != CL_SUCCESS) {
        addLog(newLogItem("clCreateCommandQueueWithProperties", CL_err));
        api->returnInt(cframe, (size_t) 0);
        return RETURN_OK;
    }

    api->returnInt(cframe, (size_t) newQ);
    return RETURN_OK;
}

INSTRUCTION_DEF createArray(FrameData* cframe) {
    cl_int CL_err = CL_SUCCESS;
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_device_id device = (cl_device_id) rawParam;

    DANA_COMP* danaComp = (DANA_COMP*) api->getParamInt(cframe, 3);
    CONTEXT_LI* contextItem = getContextByDevice(danaComp, device);
    cl_context context = contextItem->context;

    rawParam = api->getParamInt(cframe, 1);
    size_t length = (size_t) rawParam;

    rawParam = api->getParamInt(cframe, 2);
    size_t type = (size_t) rawParam;

    size_t size;
    if (type == FLOAT) {
        size = sizeof(float)*length;
    }
    else if (type == UINT) {
        size = sizeof(size_t)*length;
    }
    else {
        api->returnInt(cframe, (size_t) 0);
        return RETURN_OK;
    }

    cl_mem newArray = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &CL_err);

    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clCreateBuffer", CL_err));
        api->returnInt(cframe, (size_t) 0);
        return RETURN_OK;
    }

    api->returnInt(cframe, (size_t) newArray);

    return RETURN_OK;
}

INSTRUCTION_DEF writeIntArray(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_command_queue queue = (cl_command_queue) rawParam;
    rawParam = api->getParamInt(cframe, 1);
    cl_mem memObj = (cl_mem) rawParam;

    DanaEl* hostArray = api->getParamEl(cframe, 2);
    size_t hostArrayLen = api->getArrayLength(hostArray);

    size_t* rawHostArray = (size_t*) malloc(sizeof(size_t)*hostArrayLen);
    size_t* rawHostArrayCpy = rawHostArray;
    for (int i = 0; i < hostArrayLen; i++) {
        *rawHostArrayCpy = api->getArrayCellInt(hostArray, i);
        rawHostArrayCpy++;
    }

    cl_int CL_err = clEnqueueWriteBuffer(queue, memObj, CL_TRUE, 0, hostArrayLen*sizeof(size_t), rawHostArray, 0, NULL, NULL);
    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clEnqueueWriteBuffer", CL_err));
        api->returnInt(cframe, (size_t) 1);
        return RETURN_OK;
    }
    api->returnInt(cframe, (size_t) 0);
    free(rawHostArray);
    return RETURN_OK;    
}

INSTRUCTION_DEF readIntArray(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_command_queue queue = (cl_command_queue) rawParam;
    rawParam = api->getParamInt(cframe, 1);
    cl_mem memObj = (cl_mem) rawParam;

    size_t hostArrayLen = api->getParamInt(cframe, 2);

    size_t* rawHostArray = (size_t*) malloc(sizeof(size_t)*hostArrayLen);
    size_t* toFree = rawHostArray;

    cl_int CL_err = clEnqueueReadBuffer(queue, memObj, CL_TRUE, 0, hostArrayLen*sizeof(size_t), rawHostArray, 0, NULL, NULL);
    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clEnqueueReadBuffer", CL_err));
        api->returnEl(cframe, NULL);
        return RETURN_OK;
    }

    DanaEl* hostArray = api->makeArray(intArrayGT, hostArrayLen, NULL);
    for (int i = 0; i < hostArrayLen; i++) {
        api->setArrayCellInt(hostArray, i, *rawHostArray);
        rawHostArray++;
    }

    api->returnEl(cframe, hostArray);
    free(toFree);

    return RETURN_OK;    
}

INSTRUCTION_DEF writeFloatArray(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_command_queue queue = (cl_command_queue) rawParam;
    rawParam = api->getParamInt(cframe, 1);
    cl_mem memObj = (cl_mem) rawParam;

    DanaEl* hostArray = api->getParamEl(cframe, 2);
    size_t hostArrayLen = api->getArrayLength(hostArray);

    float* rawHostArray = (float*) malloc(sizeof(float)*hostArrayLen);
    float* rawHostArrayCpy = rawHostArray;
    for (int i = 0; i < hostArrayLen; i++) {
        *rawHostArrayCpy = api->getArrayCellDec(hostArray, i);
        rawHostArrayCpy++;
    }

    cl_int CL_err = clEnqueueWriteBuffer(queue, memObj, CL_TRUE, 0, hostArrayLen*sizeof(float), rawHostArray, 0, NULL, NULL);
    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clEnqueueWriteBuffer", CL_err));
        api->returnInt(cframe, (size_t) 1);
        return RETURN_OK;
    }
    api->returnInt(cframe, (size_t) 0);
    free(rawHostArray);
    return RETURN_OK;    
}

INSTRUCTION_DEF readFloatArray(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_command_queue queue = (cl_command_queue) rawParam;
    rawParam = api->getParamInt(cframe, 1);
    cl_mem memObj = (cl_mem) rawParam;

    size_t hostArrayLen = api->getParamInt(cframe, 2);

    float* fromDevice = (float*) malloc(sizeof(float)*hostArrayLen);
    float* toFree = fromDevice;

    cl_int CL_err = clEnqueueReadBuffer(queue, memObj, CL_TRUE, 0, hostArrayLen*sizeof(float), fromDevice, 0, NULL, NULL);
    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clEnqueueReadBuffer", CL_err));
        api->returnEl(cframe, NULL);
        return RETURN_OK;
    }


    DanaEl* danaArr = api->makeArray(decArrayGT, hostArrayLen, NULL);
    for (int i = 0; i < hostArrayLen; i++) {
        api->setArrayCellDec(danaArr, i, *fromDevice);
        fromDevice++;
    }

    api->returnEl(cframe, danaArr);
    free(toFree);

    return RETURN_OK;    
}

INSTRUCTION_DEF createMatrix(FrameData* cframe) {
    cl_int CL_err = CL_SUCCESS;
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_device_id device = (cl_device_id) rawParam;

    DANA_COMP* danaComp = (DANA_COMP*) api->getParamInt(cframe, 4);
    CONTEXT_LI* contextItem = getContextByDevice(danaComp, device);
    cl_context context = contextItem->context;

    rawParam = api->getParamInt(cframe, 1);
    size_t rows = (size_t) rawParam;

    rawParam = api->getParamInt(cframe, 2);
    size_t cols = (size_t) rawParam;

    rawParam = api->getParamInt(cframe, 3);
    size_t type = (size_t) rawParam;

    cl_image_desc desc = {CL_MEM_OBJECT_IMAGE2D, cols, rows, 0, 1, 0, 0, 0, 0, NULL};
    cl_image_format form;
    if (type == FLOAT) {
        form = (cl_image_format) {CL_R, CL_FLOAT};
    }
    else if (type == UINT) {
        form = (cl_image_format) {CL_R, CL_UNSIGNED_INT32};
    }
    else {
        api->returnInt(cframe, (size_t) 0);
        return RETURN_OK;
    }

    cl_mem newMatrix = clCreateImage(context, CL_MEM_READ_WRITE, &form, &desc, NULL, &CL_err);

    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clCreateImage", CL_err));
        api->returnInt(cframe, (size_t) 0);
        return RETURN_OK;
    }

    api->returnInt(cframe, (size_t) newMatrix);

    return RETURN_OK;
}

INSTRUCTION_DEF writeIntMatrix(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_command_queue queue = (cl_command_queue) rawParam;
    rawParam = api->getParamInt(cframe, 1);
    cl_mem memObj = (cl_mem) rawParam;

    DanaEl* hostMatrix = api->getParamEl(cframe, 2);
    size_t dim = 2; //only supporting 2d matricies
    size_t* dims = api->getArrayDimensions(hostMatrix, &dim);

    uint32_t* rawHostMatrix = (uint32_t*) malloc(sizeof(uint32_t)*dims[0]*dims[1]);
    uint32_t* rawHostMatrixCpy = rawHostMatrix;
    for (int i = 0; i < dims[0]; i++) {
        for (int j = 0; j < dims[1]; j++) {
            *rawHostMatrixCpy = api->getArrayCellInt(hostMatrix, (i*dims[1])+j);
            rawHostMatrixCpy++;
        }
    }

    size_t origin[] = {0, 0, 0};
    size_t region[] = {dims[1], dims[0], 1};
    cl_int CL_err = clEnqueueWriteImage(queue, memObj, CL_TRUE, origin, region, 0, 0, rawHostMatrix, 0, NULL, NULL);

    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clEnqueueWriteImage", CL_err));
        api->returnInt(cframe, (size_t) 1);
        return RETURN_OK;
    }
    else {
    }
    api->returnInt(cframe, (size_t) 0);
    free(rawHostMatrix);
    return RETURN_OK;    
}

INSTRUCTION_DEF readIntMatrix(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_command_queue queue = (cl_command_queue) rawParam;
    rawParam = api->getParamInt(cframe, 1);
    cl_mem memObj = (cl_mem) rawParam;

    DanaEl* matrixDims = api->getParamEl(cframe, 2);
    size_t hostMatrixLens[] = {api->getArrayCellInt(matrixDims, 0), api->getArrayCellInt(matrixDims, 1)};

    uint32_t* rawHostMatrix = (uint32_t*) malloc(sizeof(uint32_t)*hostMatrixLens[0]*hostMatrixLens[1]);
    uint32_t* toFree = rawHostMatrix;

    size_t origin[] = {0, 0, 0};
    size_t region[] = {hostMatrixLens[1], hostMatrixLens[0], 1};


    int CL_Err = clEnqueueReadImage(queue, memObj, CL_TRUE, origin, region, 0, 0, rawHostMatrix, 0, NULL, NULL);
    if (CL_Err != CL_SUCCESS) {
        addLog(newLogItem("clEnqueueReadImage", CL_Err));
        api->returnEl(cframe, NULL);
        return RETURN_OK;
    }

    DanaEl* hostMatrix = api->makeArrayMD(intMatrixGT, 2, hostMatrixLens, NULL);
    for (int i = 0; i < hostMatrixLens[0]; i++) {
        for (int j = 0; j < hostMatrixLens[1]; j++) {
            api->setArrayCellInt(hostMatrix, (i*hostMatrixLens[1])+j, *rawHostMatrix);
            rawHostMatrix++;
        }
    }

    api->returnEl(cframe, hostMatrix);
    free(toFree);

    return RETURN_OK;    
}

INSTRUCTION_DEF writeFloatMatrix(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_command_queue queue = (cl_command_queue) rawParam;
    rawParam = api->getParamInt(cframe, 1);
    cl_mem memObj = (cl_mem) rawParam;

    DanaEl* hostMatrix = api->getParamEl(cframe, 2);
    size_t dim = 2; //only supporting 2d matricies
    size_t* dims = api->getArrayDimensions(hostMatrix, &dim);

    float* rawHostMatrix = (float*) malloc(sizeof(float)*dims[0]*dims[1]);
    float* rawHostMatrixCpy = rawHostMatrix;
    for (int i = 0; i < dims[0]; i++) {
        for (int j = 0; j < dims[1]; j++) {
            *rawHostMatrixCpy = api->getArrayCellDec(hostMatrix, (i*dims[1])+j);
            rawHostMatrixCpy++;
        }
    }

    size_t origin[] = {0, 0, 0};
    size_t region[] = {dims[1], dims[0], 1};
    cl_int CL_err = clEnqueueWriteImage(queue, memObj, CL_TRUE, origin, region, 0, 0, rawHostMatrix, 0, NULL, NULL);

    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clEnqueueWriteImage", CL_err));
        api->returnInt(cframe, (size_t) 1);
        return RETURN_OK;
    }
    api->returnInt(cframe, (size_t) 0);
    free(rawHostMatrix);
    return RETURN_OK;    
}

INSTRUCTION_DEF readFloatMatrix(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_command_queue queue = (cl_command_queue) rawParam;
    rawParam = api->getParamInt(cframe, 1);
    cl_mem memObj = (cl_mem) rawParam;

    DanaEl* matrixDims = api->getParamEl(cframe, 2);
    size_t hostMatrixLens[] = {api->getArrayCellInt(matrixDims, 0), api->getArrayCellInt(matrixDims, 1)};

    float* rawHostMatrix = (float*) malloc(sizeof(float)*hostMatrixLens[0]*hostMatrixLens[1]);
    float* toFree = rawHostMatrix;

    size_t origin[] = {0, 0, 0};
    size_t region[] = {hostMatrixLens[1], hostMatrixLens[0], 1};

    int CL_Err = clEnqueueReadImage(queue, memObj, CL_TRUE, origin, region, 0, 0, rawHostMatrix, 0, NULL, NULL);
    if (CL_Err != CL_SUCCESS) {
        addLog(newLogItem("clEnqueueReadImage", CL_Err));
        api->returnEl(cframe, NULL);
        return RETURN_OK;
    }

    DanaEl* hostMatrix = api->makeArrayMD(decMatrixGT, 2, hostMatrixLens, NULL);
    for (int i = 0; i < hostMatrixLens[0]; i++) {
        for (int j = 0; j < hostMatrixLens[1]; j++) {
            api->setArrayCellDec(hostMatrix, (i*hostMatrixLens[1])+j, *rawHostMatrix);
            rawHostMatrix++;
        }
    }

    api->returnEl(cframe, hostMatrix);
    free(toFree);
    return RETURN_OK;    
}

INSTRUCTION_DEF destroyMemoryArea(FrameData* cframe) {
    cl_int CL_err = CL_SUCCESS;
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_mem memObj = (cl_mem) rawParam; 
    //the next line seg faults if memObj has already been released...
    //averting this has been attemted in dana, but I'll leave this note
    //here just in case
    CL_err = clReleaseMemObject(memObj);
    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clReleaseMemObject", CL_err));
        return RETURN_OK;
    }
    return RETURN_OK;
}

INSTRUCTION_DEF destroyContextSpace(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    DANA_COMP* comp = (DANA_COMP*) rawParam;
    destroyContexts(comp);
    return RETURN_OK;
}

INSTRUCTION_DEF destroyQueue(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_command_queue queue = (cl_command_queue) rawParam;
    clReleaseCommandQueue(queue);
    return RETURN_OK;
}

INSTRUCTION_DEF destroyProgram(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_program prog = (cl_program) rawParam;
    clReleaseProgram(prog);
    return RETURN_OK;
}

/*
    * Input: .cl program source code
    * For each platform/context attached to a DANA_COMP attempt to build the program
    * If fails, print the compile errors
    * Return: Array of built program IDs including 0s for those that failed
*/
INSTRUCTION_DEF createProgram(FrameData* cframe) {
    cl_program prog = 0;
    cl_int CL_err = CL_SUCCESS;

    char** programStrings = (char**) malloc(sizeof(char*));
    char* programSource = x_getParam_char_array(api, cframe, 0);
    *programStrings = programSource;

    DANA_COMP* danaComp = (DANA_COMP*) api->getParamInt(cframe, 1);

    cl_device_id device = (cl_device_id) api->getParamInt(cframe, 2);

    CONTEXT_LI* contextItem = getContextByDevice(danaComp, device);

    prog = clCreateProgramWithSource(contextItem->context, 1, (const char**) programStrings, NULL, &CL_err);
    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clCreateProgramWithSource", CL_err));
        api->returnInt(cframe, (size_t) 0);
        return RETURN_OK;
    }

    CL_err = CL_SUCCESS;
    CL_err = clBuildProgram(prog, contextItem->numOfDevices, contextItem->devices, NULL, NULL, NULL);
    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clBuildProgram", CL_err));
        size_t len;
        char buf[2048];
        printf("CL_err = %d\n", CL_err);
        clGetProgramBuildInfo(prog, *(contextItem->devices), CL_PROGRAM_BUILD_LOG, sizeof(buf), buf, &len);
        printf("%s\n",buf);
        api->returnInt(cframe, (size_t) 0);
        return RETURN_OK;
    }

    api->returnInt(cframe, (size_t) prog);
    free(programStrings);

    return RETURN_OK;
}

/*
    * Input: program ID, number of parameters for the program, the opencl
    * memory objects that make up the parameters, the program name
    *
    * call opencl to create kernel
    * if successful call opencl to set kernel parameters
    *
    * if both succesful return kernel ID
    * if not return 0
*/
INSTRUCTION_DEF prepareKernel(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_program program = (cl_program) rawParam;
    rawParam = api->getParamInt(cframe, 2);
    size_t paramCount = (size_t) rawParam;

    DanaEl* paramArray = api->getParamEl(cframe, 1);
    size_t* rawParamArray = (size_t*) malloc(sizeof(size_t)*paramCount);
    size_t* rawParamArrayCpy = rawParamArray;
    for (int i = 0; i < paramCount; i++) {
        *rawParamArrayCpy = (size_t) api->getArrayCellInt(paramArray, i);
        rawParamArrayCpy++;
    }

    char* progName = x_getParam_char_array(api, cframe, 3);

    cl_int CL_err = CL_SUCCESS;
    cl_kernel kernel = clCreateKernel(program, progName, &CL_err);

    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clCreateKernel", CL_err));
        api->returnInt(cframe, (size_t) 0);
        return RETURN_OK;
    }

    rawParamArrayCpy = rawParamArray;
    for (int i = 0; i < paramCount; i++) {
        rawParamArrayCpy++;
    }

    CL_err = CL_SUCCESS;
    rawParamArrayCpy = rawParamArray;
    for (int i = 0; i < paramCount; i++) {
        CL_err = clSetKernelArg(kernel, i, sizeof(size_t), rawParamArrayCpy);
        if (CL_err != CL_SUCCESS) {
            addLog(newLogItem("clSetKernelArg", CL_err));
            api->returnInt(cframe, (size_t) 0);
            return RETURN_OK;
        }
        rawParamArrayCpy++;
    }
    CL_err = clSetKernelArg(kernel, paramCount, sizeof(size_t), ¶mCount);

    api->returnInt(cframe, (size_t) kernel);
    free(rawParamArray);

    return RETURN_OK;

}

INSTRUCTION_DEF runKernel(FrameData* cframe) {
    size_t rawParam = api->getParamInt(cframe, 0);
    cl_kernel kernel = (cl_kernel) rawParam;

    rawParam = api->getParamInt(cframe, 1);
    cl_command_queue queue = (cl_command_queue) rawParam;

    //create an amount of kernel threads that is
    //equivilent to the size and shape of the output
    //vector/matrix of the kernel parameters
    DanaEl* rawOutputDimentions = api->getParamEl(cframe, 2);

    size_t rawArrLen = api->getArrayLength(rawOutputDimentions);
    
    size_t* globalWorkers = (size_t*) malloc(sizeof(size_t)*rawArrLen);
    for(int i = 0; i < rawArrLen; i++) {
        *(globalWorkers+i) = api->getArrayCellInt(rawOutputDimentions, i);
    }

    //synchronisation handling
    cl_event* kernel_event = (cl_event*) malloc(sizeof(cl_event));

    cl_int CL_err = CL_SUCCESS;
    CL_err = clEnqueueNDRangeKernel(queue, kernel, rawArrLen, NULL, globalWorkers, NULL, 0, NULL, kernel_event);
    if (CL_err != CL_SUCCESS) {
        addLog(newLogItem("clEnqueueNDRangeKernel", CL_err));
        api->returnInt(cframe, (size_t) 1);
        return RETURN_OK;
    }

    //wait for kernel to execute before continuing
    clWaitForEvents(1, kernel_event);

    //clean up
    clReleaseEvent(*kernel_event);
    free(kernel_event);
    free(globalWorkers);
    clRetainKernel(kernel);

    api->returnInt(cframe, (size_t) 0);
    return RETURN_OK;
}

INSTRUCTION_DEF findPlatforms(void) {
    cl_int CL_err = CL_SUCCESS;
    cl_uint numPlatforms = 0;
    
    CL_err = clGetPlatformIDs( 0, NULL, &numPlatforms );
    
    if (CL_err == CL_SUCCESS) {
        printf("%u platform(s) found\n", numPlatforms);
    }
    else {
        printf("clGetPlatformIDs(%i)\n", CL_err);
    }

    return RETURN_OK;
}

Interface* load(CoreAPI* capi) {
    api = capi;

    initOpenCL();

    setInterfaceFunction("findPlatforms", findPlatforms);
    setInterfaceFunction("getComputeDeviceIDs", getComputeDeviceIDs);
    setInterfaceFunction("getComputeDevices", getComputeDevices);
    setInterfaceFunction("createContext", createContext);
    setInterfaceFunction("createAsynchQueue", createAsynchQueue);
    setInterfaceFunction("createSynchQueue", createSynchQueue);
    setInterfaceFunction("createArray", createArray);
    setInterfaceFunction("writeIntArray", writeIntArray);
    setInterfaceFunction("readIntArray", readIntArray);
    setInterfaceFunction("writeFloatArray", writeFloatArray);
    setInterfaceFunction("readFloatArray", readFloatArray);
    setInterfaceFunction("createMatrix", createMatrix);
    setInterfaceFunction("writeIntMatrix", writeIntMatrix);
    setInterfaceFunction("readIntMatrix", readIntMatrix);
    setInterfaceFunction("writeFloatMatrix", writeFloatMatrix);
    setInterfaceFunction("readFloatMatrix", readFloatMatrix);
    setInterfaceFunction("destroyMemoryArea", destroyMemoryArea);
    setInterfaceFunction("createProgram", createProgram);
    setInterfaceFunction("prepareKernel", prepareKernel);
    setInterfaceFunction("runKernel", runKernel);
    setInterfaceFunction("createContextSpace", createContextSpace);
    setInterfaceFunction("printLogs", printLogs);
    setInterfaceFunction("destroyContextSpace", destroyContextSpace);
    setInterfaceFunction("destroyProgram", destroyProgram);
    setInterfaceFunction("destroyQueue", destroyQueue);

    charArrayGT = api->resolveGlobalTypeMapping(getTypeDefinition("char[]"));
    stringArrayGT = api->resolveGlobalTypeMapping(getTypeDefinition("String[]"));
    stringItemGT = api->resolveGlobalTypeMapping(getTypeDefinition("String"));
    intArrayGT = api->resolveGlobalTypeMapping(getTypeDefinition("int[]"));
    intMatrixGT = api->resolveGlobalTypeMapping(getTypeDefinition("int[][]"));
    decArrayGT = api->resolveGlobalTypeMapping(getTypeDefinition("dec[]"));
    decMatrixGT = api->resolveGlobalTypeMapping(getTypeDefinition("dec[][]"));

    return getPublicInterface();
}

void unload() {
    api->decrementGTRefCount(charArrayGT);
    api->decrementGTRefCount(stringArrayGT);
    api->decrementGTRefCount(stringItemGT);
    api->decrementGTRefCount(intArrayGT);
    api->decrementGTRefCount(intMatrixGT);
    api->decrementGTRefCount(decArrayGT);
    api->decrementGTRefCount(decMatrixGT);
}
Revision history
To propose a new revision to this entity, use dana source put -uls your/new/version.c -n OpenCLLib -gni io.compute.Compute -apiv 17 -m "reason for update" -u yourUsername
Version 1 (this version) by ben
Notes for this version: Standard Library Initialisation