CULA and CuBlas with Delphi

General CULA Dense (LAPACK & BLAS) support and troubleshooting. Use this forum if you are having a general problem or have encountered a bug.

CULA and CuBlas with Delphi

Postby gustavo » Sat Jul 09, 2011 8:22 pm

Matrix/Vector Classes (dense) wrapping the main CuBlas and CULA functions for Delphi:

The Code is in this order: Example, Matrix Vector Classes Unit, CULA Unit and CuBlas Unit... Copy/paste and start playing with your GPU

it was tested with:
Delphi 7.0
CUDA Toolkit 4.0
CULA R12 (Premium for Double)
devdriver_4.0_winxp_32_270.81_general.exe
NVIDIA Quadro VCQ 4000 Professional Graphics Card VCQ4000-PB

Example:

Code: Select all
unit UnitCulaMtxVec;
   
    interface
   
    uses
      Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
      Dialogs, CULACuBlasMtxVec, StdCtrls;
   
    type
      TForm1 = class(TForm)
        Memo1: TMemo;
        Button1: TButton;
        procedure Button1Click(Sender: TObject);
        procedure FormClose(Sender: TObject; var Action: TCloseAction);
        procedure FormCreate(Sender: TObject);
      private
        { Private declarations }
      public
        { Public declarations }
       N: integer;
       a, b: TVec;
       c: TMtx;
      end;
   
    var
      Form1: TForm1;
   
   
    implementation
   
    {$R *.dfm}
   
    procedure TForm1.FormCreate(Sender: TObject);
    var i,j: integer;
    begin
   
      N:= 5;
   
      a:= TVec.Create(N);
      b:= TVec.Create(N);
      c:= TMtx.Create(N);
   
      Memo1.Clear;
   
      Randomize;
   
      for i:= 0 to N-1 do
      begin
       a.Value[i]:= 1.0 + Random(100)/10;
       b.Value[i]:= 1.0 + Random(100)/10;
      end;
   
      Memo1.Lines.Add('Valor Vector a');
      for i:= 0 to N-1 do
        Memo1.Lines.Add(FloattoStr(a.Value[i]));
   
      Memo1.Lines.Add('Valor Vector b');
      for i:= 0 to N-1 do
        Memo1.Lines.Add(FloattoStr(b.Value[i]));
   
      Memo1.Lines.Add('Valor Matriz c');
      for i:= 0 to N-1 do
       for j:= 0 to N-1 do
        c.Value[i,j]:= 3.0 + Random(100)/10;
   
      Memo1.Lines.Add('Valor c');
      for i:= 0 to N-1 do
       for j:= 0 to N-1 do
        Memo1.Lines.Add(FloattoStr(c.Value[i,j]));
    end;
   
    procedure TForm1.FormClose(Sender: TObject; var Action: TCloseAction);
    begin
      a.Destroy;
      b.Destroy;
      c.Destroy;
    end;
   
    procedure TForm1.Button1Click(Sender: TObject);
    var i,j: integer;
    begin
      a.Add(b);
      Memo1.Lines.Add('Suma a:= a+b');
      for i:= 0 to N-1 do
        Memo1.Lines.Add(FloattoStr(a.Value[i]));
   
      a.Sub(b);
      Memo1.Lines.Add('Resta a:= a-b');
      for i:= 0 to N-1 do
        Memo1.Lines.Add(FloattoStr(a.Value[i]));
   
      Memo1.Lines.Add('Norm a.Norm2');
      Memo1.Lines.Add(FloattoStr(a.Norm2));
   
      Memo1.Lines.Add('Dot a.Dot(b)');
      Memo1.Lines.Add(FloattoStr(a.Dot(B)));
   
      b.Scale(1.5);
      Memo1.Lines.Add('Valor b scale 1.5X');
      for i:= 0 to N-1 do
        Memo1.Lines.Add(FloattoStr(b.Value[i]));
   
      c.Scale(3);
      Memo1.Lines.Add('Valor c scale 3X');
      for i:= 0 to N-1 do
       for j:= 0 to N-1 do
        Memo1.Lines.Add(FloattoStr(c.Value[i,j]));
   
      a:= c.Prod(b);
      Memo1.Lines.Add('Prod a:=c.Prod(b)');
      for i:= 0 to N-1 do
        Memo1.Lines.Add(FloattoStr(a.Value[i]));
   
      a:= c.Solve(B);
      Memo1.Lines.Add('Solve a:=c.Solve(b)');
      for i:= 0 to N-1 do
        Memo1.Lines.Add('a[' + InttoStr(i) + '] := '+FloattoStr(a.Value[i]));
   
    end;
   
    end.
Matrix/Vector Classes:

Code: Select all
unit CULACuBlasMtxVec;
   
    interface
   
    uses  SysUtils, CuBlas, Math, Classes, CULA, Windows;
   
    type
      TVec = Class
      private
        function GetValue(index: integer): Double;
        procedure SetValue(index: integer; const Value: Double);
      protected
      FLength: integer;
      Values: Array of Double;
      public
      constructor Create(n: integer);
      destructor Destroy; override;
      function Add(Vec: TVec): TVec;
      function Sub(Vec: TVec): TVec;
      function Dot(Vec: TVec): Double;
      function Norm2: Double;
      procedure Scale(value: Double);
      property Value[index: integer]: Double read GetValue write SetValue;
      end;
   
      TMtx = Class
      private
        function GetValue(col, row: integer): Double;
        procedure SetValue(col, row: integer; const Value: Double);
      protected
      FLength: integer;
      Frank: integer;
      Values: Array of Double;
      public
      constructor Create(n: integer);
      destructor Destroy; override;
      function Add(Mtx: TMtx): TMtx;
      function Sub(Mtx: TMtx): TMtx;
      function Norm2: Double;
      function Prod(Vec: TVec): TVec;
      procedure Scale(Value: Double);
      function Solve(B: TVec): TVec;
      property Value[col, row: integer]: Double read GetValue write SetValue;
      end;
   
    var
      SelfValues, VecValues, VecValues1: PDoubleVector;
      Ipiv: PIntegerVector;
   
    implementation
   
    type
    TSystem = Class
      A: TMtx;
      X: TVec;
      constructor Create(n: integer);
      destructor Destroy; override;
      end;
   
    { TculaVec }
   
    constructor TVec.Create(n: integer);
    begin
      inherited Create;
      Flength:= n;
      SetLength(values, n);
    end;
   
    function TVec.GetValue(index: integer): Double;
    begin
      result:= Values[index];
    end;
   
    procedure TVec.SetValue(index: integer; const Value: Double);
    begin
      Values[index]:= Value;
    end;
   
    function TVec.Add(Vec: TVec): TVec;
    begin
      Result:= TVec(Self);
   
      statuscublas:= cublasAlloc(Flength, sizeof(Self.Values[0]), @SelfValues);
      statuscublas:= cublasAlloc(Flength, sizeof(Vec.Values[0]), @VecValues);
   
      statuscublas:= cublasSetVector(Flength, sizeof(Self.Values[0]),@Self.Values[0],1,@SelfValues[0],1);
      statuscublas:= cublasSetVector(Flength, sizeof(Vec.Values[0]),@Vec.Values[0],1,@VecValues[0],1);
   
      cublasDaxpy(Flength,1.0,VecValues[0],1,SelfValues[0],1);
   
      statuscublas:= cublasGetVector(Flength, sizeof(Self.Values[0]),@SelfValues[0],1,@Result.Values[0],1);
   
      statuscublas:= cublasFree(@VecValues[0]);
      statuscublas:= cublasFree(@SelfValues[0]);
   
    end;
   
    function TVec.Sub(Vec: TVec): TVec;
    begin
      Result:= TVec(Self);
   
      statuscublas:= cublasAlloc(Flength, sizeof(Self.Values[0]), @SelfValues);
      statuscublas:= cublasAlloc(Flength, sizeof(Vec.Values[0]), @VecValues);
   
      statuscublas:= cublasSetVector(Flength, sizeof(Self.Values[0]),@Self.Values[0],1,@SelfValues[0],1);
      statuscublas:= cublasSetVector(Flength, sizeof(Vec.Values[0]),@Vec.Values[0],1,@VecValues[0],1);
   
      cublasDaxpy(Flength,-1.0,VecValues[0],1,SelfValues[0],1);
   
      statuscublas:= cublasGetVector(Flength, sizeof(Self.Values[0]),@SelfValues[0],1,@Result.Values[0],1);
   
      statuscublas:= cublasFree(@VecValues[0]);
      statuscublas:= cublasFree(@SelfValues[0]);
   
    end;
   
    function TVec.Norm2: Double;
    begin
   
      statuscublas:= cublasAlloc(Flength, sizeof(Self.Values[0]), @SelfValues);
   
      statuscublas:= cublasSetVector(Flength, sizeof(Self.Values[0]),@Self.Values[0],1,@SelfValues[0],1);
   
      result:= cublasDnrm2(Flength, SelfValues[0], 1);
   
      statuscublas:= cublasFree(@SelfValues[0]);
   
    end;
   
    destructor TVec.Destroy;
    begin
      Finalize(Values,0);
      inherited;
    end;
   
    function TVec.Dot(Vec: TVec): Double;
    begin
     
      statuscublas:= cublasAlloc(Flength, sizeof(Self.Values[0]), @SelfValues);
      statuscublas:= cublasAlloc(Flength, sizeof(Vec.Values[0]), @VecValues);
   
      statuscublas:= cublasSetVector(Flength, sizeof(Self.Values[0]),@Self.Values[0],1,@SelfValues[0],1);
      statuscublas:= cublasSetVector(Flength, sizeof(Vec.Values[0]),@Vec.Values[0],1,@VecValues[0],1);
   
      result:= cublasDdot(Flength,VecValues[0],1,SelfValues[0],1);
   
      statuscublas:= cublasFree(@VecValues[0]);
      statuscublas:= cublasFree(@SelfValues[0]);
   
    end;
   
    procedure TVec.Scale(value: Double);
    begin
   
      statuscublas:= cublasAlloc(Flength, sizeof(Self.Values[0]), @SelfValues);
   
      statuscublas:= cublasSetVector(Flength, sizeof(Self.Values[0]),@Self.Values[0],1,@SelfValues[0],1);
   
      cublasDscal(Self.FLength, value, SelfValues[0], 1);
   
      statuscublas:= cublasGetVector(Flength, sizeof(Self.Values[0]),@SelfValues[0],1,@Self.Values[0],1);
   
    end;
   
    { TculaMtx }
   
    function TMtx.Add(Mtx: TMtx): TMtx;
    begin
      Result:= TMtx(Self);
   
      statuscublas:= cublasAlloc(Flength, sizeof(Self.Values[0]), @SelfValues);
      statuscublas:= cublasAlloc(Flength, sizeof(Mtx.Values[0]), @VecValues);
   
      statuscublas:= cublasSetVector(Flength, sizeof(Self.Values[0]),@Self.Values[0],1,@SelfValues[0],1);
      statuscublas:= cublasSetVector(Flength, sizeof(Mtx.Values[0]),@Mtx.Values[0],1,@VecValues[0],1);
   
      cublasDaxpy(Flength,1.0,VecValues[0],1,SelfValues[0],1);
   
      statuscublas:= cublasGetVector(Flength, sizeof(Self.Values[0]),@SelfValues[0],1,@Result.Values[0],1);
   
      statuscublas:= cublasFree(@VecValues[0]);
      statuscublas:= cublasFree(@SelfValues[0]);
   
    end;
   
    constructor TMtx.Create(n: integer);
    begin
      Frank:= n;
      Flength:= n*n;
      SetLength(values, Flength);
    end;
   
    destructor TMtx.Destroy;
    begin
      Finalize(Values,0);
      inherited;
    end;
   
    function TMtx.GetValue(col, row: integer): Double;
    begin
      result:= Values[IDX2(col, row, Frank)];
    end;
   
    function TMtx.Norm2: Double;
    begin
   
      statuscublas:= cublasAlloc(Flength, sizeof(Self.Values[0]), @SelfValues);
   
      statuscublas:= cublasSetVector(Flength, sizeof(Self.Values[0]),@Self.Values[0],1,@SelfValues[0],1);
   
      result:= cublasDnrm2(Flength, SelfValues[0], 1);
   
      statuscublas:= cublasFree(@SelfValues[0]);
   
    end;
   
    function TMtx.Prod(Vec: TVec): TVec;
    var i: integer;
    begin
      Result:= TVec.Create(Vec.FLength);
   
      statuscublas:= cublasAlloc(Flength, sizeof(Self.Values[0]), @SelfValues);
      statuscublas:= cublasAlloc(Vec.Flength, sizeof(Vec.Values[0]), @VecValues);
      statuscublas:= cublasAlloc(Result.Flength, sizeof(Vec.Values[0]), @VecValues1);
   
      statuscublas:= cublasSetVector(Flength, sizeof(Self.Values[0]),@Self.Values[0],1,@SelfValues[0],1);
      statuscublas:= cublasSetVector(Vec.Flength, sizeof(Vec.Values[0]),@Vec.Values[0],1,@VecValues[0],1);
      statuscublas:= cublasSetVector(Result.Flength, sizeof(Result.Values[0]),@Result.Values[0],1,@VecValues1[0],1);
   
      cublasDgemv('n',Vec.Flength,Vec.Flength,1.0,SelfValues[0],Vec.FLength,VecValues[0],1,0.0,VecValues1[0],1);
   
      statuscublas:= cublasGetVector(Vec.Flength, sizeof(Vec.Values[0]),@VecValues1[0],1,@Result.Values[0],1);
   
      statuscublas:= cublasFree(@SelfValues[0]);
      statuscublas:= cublasFree(@VecValues[0]);
      statuscublas:= cublasFree(@VecValues1[0]);
   
    end;
   
    procedure TMtx.Scale(value: Double);
    begin
   
      statuscublas:= cublasAlloc(Flength, sizeof(Self.Values[0]), @SelfValues);
   
      statuscublas:= cublasSetVector(Flength, sizeof(Self.Values[0]),@Self.Values[0],1,@SelfValues[0],1);
   
      cublasDscal(Self.FLength, value, SelfValues[0], 1);
   
      statuscublas:= cublasGetVector(Flength, sizeof(Self.Values[0]),@SelfValues[0],1,@Self.Values[0],1);
   
    end;
   
    procedure TMtx.SetValue(col, row: integer; const Value: Double);
    begin
      Values[IDX2(col, row, Frank)]:= Value;
    end;
   
    var
     AX: TSystem;
   
    { TSystem }
   
    constructor TSystem.Create(n: integer);
    begin
      A:= TMtx.Create(0);
      X:= TVec.Create(0);
    end;
   
    destructor TSystem.Destroy;
    begin
      A.Destroy;
      X.Destroy;
      inherited;
    end;
   
    procedure SolveSys;
    var N, NRHS: integer;
        IPIVec: array of Integer;
    begin
   
      N:= AX.X.Flength;
      NRHS:= 1;
   
      SetLength(IPIVec,N);
   
      statuscula := culaDgesv(N, NRHS, AX.A.Values[0], N, IPIVec[0], AX.X.Values[0], N);
   
      ExitThread(0);
    end;
   
    function TMtx.Solve(B: TVec): TVec;
    var N: integer;
        thr: THandle;
        thrID: DWORD;
    begin
      N:= Frank;
   
      Result:= TVec.Create(N);
   
      Result.Values:= Copy(B.Values,0,N);
   
      AX:= TSystem.Create(N);
      AX.A.Values:= Self.Values;
      AX.X.Values:= Result.Values;
   
      thr := CreateThread(nil, 0, @SolveSys, 0, 0, thrID);
      WaitForSingleObject(thr, INFINITE);
   
      AX.Destroy;
    end;
   
    function TMtx.Sub(Mtx: TMtx): TMtx;
    begin
      Result:= TMtx(Self);
   
      statuscublas:= cublasAlloc(Flength, sizeof(Self.Values[0]), @SelfValues);
      statuscublas:= cublasAlloc(Flength, sizeof(Mtx.Values[0]), @VecValues);
   
      statuscublas:= cublasSetVector(Flength, sizeof(Self.Values[0]),@Self.Values[0],1,@SelfValues[0],1);
      statuscublas:= cublasSetVector(Flength, sizeof(Mtx.Values[0]),@Mtx.Values[0],1,@VecValues[0],1);
   
      cublasDaxpy(Flength,-1.0,VecValues[0],1,SelfValues[0],1);
   
      statuscublas:= cublasGetVector(Flength, sizeof(Self.Values[0]),@SelfValues[0],1,@Result.Values[0],1);
   
      statuscublas:= cublasFree(@VecValues[0]);
      statuscublas:= cublasFree(@SelfValues[0]);
   
    end;
   
    end.
Last edited by gustavo on Sat Jul 09, 2011 8:30 pm, edited 1 time in total.
gustavo
CULA Premium
 
Posts: 5
Joined: Fri Jun 17, 2011 7:03 pm

Re: CULA and CuBlas with Delphi

Postby gustavo » Sat Jul 09, 2011 8:24 pm

CULA Unit (single and double(for double you need the premiun version)):

Code: Select all
unit CULA;
    (*
     * Copyright (C) 2009-2010 EM Photonics, Inc.  All rights reserved.
     *
     * NOTICE TO USER:   
     *
     * This source code is subject to EM Photonics ownership rights under U.S. and
     * international Copyright laws.  Users and possessors of this source code may
     * not redistribute this code without the express written consent of EM
     * Photonics, Inc.
     *
     * EM PHOTONICS MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
     * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED
     * WARRANTY OF ANY KIND.  EM PHOTONICS DISCLAIMS ALL WARRANTIES WITH REGARD TO
     * THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
     * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL EM
     * PHOTONICS BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
     * DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
     * PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
     * ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
     * SOURCE CODE.
     *
     * U.S. Government End Users.   This source code is a "commercial item" as that
     * term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of "commercial
     * computer  software"  and "commercial computer software documentation" as
     * such terms are  used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the
     * U.S. Government only as a commercial end item.  Consistent with 48
     * C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
     * U.S. Government End Users acquire the source code with only those rights set
     * forth herein.
     *
     * Any use of this source code in individual and commercial software must
     * include, in the user documentation and internal comments to the code, the
     * above Disclaimer and U.S. Government End Users Notice.
     *
     * implmented in Delphi by Gustavo Sanchez 22/Jun/2011
     * only single and double
     *)
   
    interface
   
    uses SysUtils;
   
    type
    culaStatus =
    (
        culaNoError,                       // No error
        culaNotInitialized,                // CULA has not been initialized
        culaNoHardware,                    // No hardware is available to run
        culaInsufficientRuntime,           // CUDA runtime or driver is not supported
        culaInsufficientComputeCapability, // Available GPUs do not support the requested operation
        culaInsufficientMemory,            // There is insufficient memory to continue
        culaFeatureNotImplemented,         // The requested feature has not been implemented
        culaArgumentError,                 // An invalid argument was passed to a function
        culaDataError,                     // An operation could not complete because of singular data
        culaBlasError,                     // A blas error was encountered
        culaRuntimeError                   // A runtime error has occurred
    );
   
    (**
     * @brief Provides extended error information for CULA functions.
     * When negative, this value specifies an argument position with an invalid
     * value.
     *)
   
    info_t= integer;
    culaInfo = integer;
    culaVersion = integer;
   
    // host types
    culaFloat= Single;
    PculaFloat= ^culaFloat;
    culaDouble= Double;
    PculaDouble= ^culaDouble;
    culaInt= Integer;
    PculaInt= ^culaInt;
   
    // device types
    culaDeviceFloat= culaFloat;
    PculaDeviceFloat= ^culaDeviceFloat;
    culaDeviceDouble= culaDouble;
    PculaDeviceDouble= ^culaDeviceDouble;
    culaDeviceInt= culaInt;
    PculaDeviceInt= ^culaDeviceInt;
   
    culaFloatComplex = record
      x, y: culaFloat;
     end;
    PculaFloatComplex = ^culaFloatComplex;
   
    culaDoubleComplex = record
      x, y: culaDouble;
     end;
    PculaDoubleComplex = ^culaDoubleComplex;
   
    TSingleVector = array[0..0] of Single;
    PSingleVector = ^TSingleVector;
    TDoubleVector = array[0..0] of Double;
    PDoubleVector = ^TDoubleVector;
    TIntegerVector = array[0..0] of Integer;
    PIntegerVector = ^TIntegerVector;
   
    const
      CULADLL = 'cula.dll';
   
    var
      statuscula: culaStatus;
     
     
   
   
    function IDX2(i, j, rank: integer): integer; {(((j)*(lead_dim))+(i)) }
   
    (**
     * @brief Initializes CULA
     * Must be called before using any other function.  Some functions have an
     * exception to this rule:  culaGetDeviceCount, culaSelectDevice
     *
     * @return culaNoError on a successful initialization or the culaStatus enum
     * that specifies an error
     *)
    function culaInitialize: culaStatus; stdcall; external CULADLL name 'culaInitialize';
   
    (**
     * @brief Shuts down CULA
     * Must be called to deallocate CULA internal data
     *)
    procedure culaShutdown; stdcall; external CULADLL name 'culaShutdown';
   
    (**
     * @brief Returns the last status code returned from a CULA function
     *
     * @return The last CULA status code
     *)
    function culaGetLastStatus: culaStatus; stdcall; external CULADLL name 'culaGetLastStatus';
   
    (**
     * @brief Associates a culaStatus enum with a readable error string
     *
     * @param e A culaStatus error code
     *
     * @return A string that corresponds with the specified culaStatus enum
     *)
    function culaGetStatusString( e: culaStatus): PChar; stdcall; external CULADLL name 'culaGetStatusString';
   
    (**
     * @brief This function is used to provide extended functionality that LAPACK's
     * info parameter typically provides
     *
     * @return Extended information about the last error or zero if it is
     * unavailable
     *)
    function culaGetErrorInfo: info_t; stdcall; external CULADLL name 'culaGetErrorInfo';
   
    (**
     * @brief Associates a culaStatus and info_t with a readable error string
     *
     * @param e A culaStatus error code
     * @param i An info_t error code
     * @param buf Pointer to a buffer into which information will be printed
     * @param bufsize The size of buf, printed information will not exceed bufsize
     *
     * @return culaNoError on a successful error report or culaArgumentError on an
     * invalid argument to this function
     *)
    function culaGetErrorInfoString(e: culaStatus; i: info_t; buf: PChar; bufsize: integer): culaStatus ;
                                   stdcall; external CULADLL name 'culaGetErrorInfoString';
   
    (**
     * @brief Releases any memory buffers stored internally by CULA
     *)
    procedure culaFreeBuffers; stdcall; external CULADLL name 'culaFreeBuffers';
   
    (**
     * @brief Reports the number of GPU devices
     * Can be called before culaInitialize
     *
     * @param num Pointer to receive the number of devices
     *
     * @return culaNoError on sucess, culaArgumentError on invalid pointer
     *)
    function culaGetDeviceCount(var dev: Integer): culaStatus; stdcall; external CULADLL name 'culaGetDeviceCount';
   
    (**
     * @brief Selects a device with which CULA will operate
     * To bind without error, this function must be called before culaInitialize
     *
     * @param dev Specifies the device id of the GPU device
     *
     * @return culaNoError on sucess, culaArgumentError on an invalid device id,
     * culaRuntimeError if the running thread has already been bound to a GPU device
     *)
    function culaSelectDevice(dev: integer): culaStatus; stdcall; external CULADLL name 'culaSelectDevice';
   
    (**
     * @brief Reports the id of the GPU device executing CULA
     *
     * @param dev Pointer to receive the GPU device number
     *
     * @return culaNoError on sucess, culaArgumentError on invalid pointer
     *)
    function culaGetExecutingDevice(var dev: integer): culaStatus; stdcall; external CULADLL name 'culaGetExecutingDevice';
   
    (**
     * @brief Prints information to a buffer about a specified device
     *
     * @param dev CUDA device id to print information about
     * @param buf Pointer to a buffer into which information will be printed
     * @param bufsize The size of buf, printed information will not exceed bufsize
     *
     * @return culaNoError on sucess, culaArgumentError on invalid buf pointer,
     * invalid device id, or invalid bufsize
     *)
    function culaGetDeviceInfo(dev: integer; buf: PChar; bufsize: Integer): culaStatus; stdcall; external CULADLL name 'culaGetDeviceInfo';
   
    (**
     * @brief Allocates memory on the device in a pitch that is optimal for CULA
     *
     * @param mem Pointer to which a newly allocated buffer will be assigned
     * @param pitch The pitch of the allocation in elements (where *pitch >= rows)
     * @param rows The number of rows of the matrix
     * @param cols The number of columns of the matrix
     * @param elesize The size in bytes of the desired element
     *
     * @return culaNoError on successful allocation, culaInsufficientMemory on failure
     *)
    function culaDeviceMalloc(mem: Pointer; var pitch: integer; rows: integer; cols: integer; elesize: integer): culaStatus; stdcall; external CULADLL name 'culaDeviceMalloc';
   
   
    (**
     * @brief Frees memory that has been allocated with culaDeviceMalloc
     *
     * @param mem Pointer to a buffer that is to be freed
     *
     * @return culaNoError on successful free, culaArgumentError on failure
     *)
    function culaDeviceFree(mem: Pointer): culaStatus; stdcall; external CULADLL name 'culaDeviceFree';
   
   
    (*  culapack.h functions only single and double  *)
   
      function culaDbdsqr(uplo: char; n: Integer; ncvt: Integer; nru: Integer; ncc: Integer;
        var d: culaDouble; var e: culaDouble; var vt: culaDouble; ldvt: Integer; var u: culaDouble;
        ldu: Integer; var c: culaDouble; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaDbdsqr';
      function culaDgbtrf(m: Integer; n: Integer; kl: Integer; ku: Integer; var a: culaDouble;
        lda: Integer; var ipiv: culaInt): culaStatus; stdcall; external CULADLL name 'culaDgbtrf';
      function culaDgeNancheck(m: Integer; n: Integer; var a: culaDouble; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaDgeNancheck';
      function culaDgeTranspose(m: Integer; n: Integer; var a: culaDouble; lda: Integer;
        var b: culaDouble; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaDgeTranspose';
      function culaDgeTransposeInplace(n: Integer; var a: culaDouble; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaDgeTransposeInplace';
      function culaDgebrd(m: Integer; n: Integer; var a: culaDouble; lda: Integer;
        var d: culaDouble; var e: culaDouble; var tauq: culaDouble; var taup: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDgebrd';
      function culaDgeev(jobvl: char; jobvr: char; n: Integer; var a: culaDouble; lda: Integer;
        var wr: culaDouble; var wi: culaDouble; var vl: culaDouble; ldvl: Integer; var vr: culaDouble;
        ldvr: Integer): culaStatus; stdcall; external CULADLL name 'culaDgeev';
      function culaDgehrd(n: Integer; ilo: Integer; ihi: Integer; var a: culaDouble;
        lda: Integer; var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDgehrd';
      function culaDgelqf(m: Integer; n: Integer; var a: culaDouble; lda: Integer;
        var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDgelqf';
      function culaDgels(trans: char; m: Integer; n: Integer; nrhs: Integer; var a: culaDouble;
        lda: Integer; var b: culaDouble; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaDgels';
      function culaDgeqlf(m: Integer; n: Integer; var a: culaDouble; lda: Integer;
        var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDgeqlf';
      function culaDgeqrf(m: Integer; n: Integer; var a: culaDouble; lda: Integer;
        var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDgeqrf';
      function culaDgeqrs(m: Integer; n: Integer; nrhs: Integer; var a: culaDouble;
        lda: Integer; var tau: culaDouble; var b: culaDouble; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaDgeqrs';
      function culaDgerqf(m: Integer; n: Integer; var a: culaDouble; lda: Integer;
        var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDgerqf';
   
      function culaDgesv(n: Integer; nrhs: Integer; var a: culaDouble; lda: Integer;
        var ipiv: culaInt; var b: culaDouble; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaDgesv';
   
      function culaDgesvd(jobu: char; jobvt: char; m: Integer; n: Integer; var a: culaDouble;
        lda: Integer; var s: culaDouble; var u: culaDouble; ldu: Integer; var vt: culaDouble;
        ldvt: Integer): culaStatus; stdcall; external CULADLL name 'culaDgesvd';
      function culaDgetrf(m: Integer; n: Integer; var a: culaDouble; lda: Integer;
        var ipiv: culaInt): culaStatus; stdcall; external CULADLL name 'culaDgetrf';
      function culaDgetri(n: Integer; var a: culaDouble; lda: Integer; var ipiv: culaInt): culaStatus; stdcall; external CULADLL name 'culaDgetri';
      function culaDgetrs(trans: char; n: Integer; nrhs: Integer; var a: culaDouble;
        lda: Integer; var ipiv: culaInt; var b: culaDouble; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaDgetrs';
      function culaDgglse(m: Integer; n: Integer; p: Integer; var a: culaDouble; lda: Integer;
        var b: culaDouble; ldb: Integer; var c: culaDouble; var d: culaDouble; var x: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDgglse';
      function culaDggrqf(m: Integer; p: Integer; n: Integer; var a: culaDouble; lda: Integer;
        var taua: culaDouble; var b: culaDouble; ldb: Integer; var taub: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDggrqf';
      function culaDlacpy(uplo: char; m: Integer; n: Integer; var a: culaDouble; lda: Integer;
        var b: culaDouble; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaDlacpy';
      function culaDlag2s(m: Integer; n: Integer; var a: culaDouble; lda: Integer;
        var sa: culaFloat; ldsa: Integer): culaStatus; stdcall; external CULADLL name 'culaDlag2s';
      function culaDlar2v(n: Integer; var x: culaDouble; var y: culaDouble; var z: culaDouble;
        incx: Integer; var c: culaDouble; var s: culaDouble; incc: Integer): culaStatus; stdcall; external CULADLL name 'culaDlar2v';
      function culaDlarfb(side: char; trans: char; direct: char; storev: char; m: Integer;
        n: Integer; k: Integer; var v: culaDouble; ldv: Integer; var t: culaDouble; ldt: Integer;
        var c: culaDouble; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaDlarfb';
      function culaDlarfg(n: Integer; var alpha: culaDouble; var x: culaDouble; incx: Integer;
        var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDlarfg';
      function culaDlargv(n: Integer; var x: culaDouble; incx: Integer; var y: culaDouble;
        incy: Integer; var c: culaDouble; incc: Integer): culaStatus; stdcall; external CULADLL name 'culaDlargv';
      function culaDlartv(n: Integer; var x: culaDouble; incx: Integer; var y: culaDouble;
        incy: Integer; var c: culaDouble; var s: culaDouble; incc: Integer): culaStatus; stdcall; external CULADLL name 'culaDlartv';
      function culaDlascl(typee: char; kl: Integer; ku: Integer; cfrom: culaDouble;
        cto: culaDouble; m: Integer; n: Integer; var a: culaDouble; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaDlascl';
      function culaDlaset(uplo: char; m: Integer; n: Integer; alpha: culaDouble; beta: culaDouble;
        var a: culaDouble; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaDlaset';
      function culaDlasr(side: char; pivot: char; direct: char; m: Integer; n: Integer;
        var c: culaDouble; var s: culaDouble; var a: culaDouble; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaDlasr';
      function culaDlat2s(uplo: char; n: Integer; var a: culaDouble; lda: Integer;
        var sa: culaFloat; ldsa: Integer): culaStatus; stdcall; external CULADLL name 'culaDlat2s';
      function culaDorgbr(vect: char; m: Integer; n: Integer; k: Integer; var a: culaDouble;
        lda: Integer; var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDorgbr';
      function culaDorghr(n: Integer; ilo: Integer; ihi: Integer; var a: culaDouble;
        lda: Integer; var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDorghr';
      function culaDorglq(m: Integer; n: Integer; k: Integer; var a: culaDouble; lda: Integer;
        var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDorglq';
      function culaDorgql(m: Integer; n: Integer; k: Integer; var a: culaDouble; lda: Integer;
        var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDorgql';
      function culaDorgqr(m: Integer; n: Integer; k: Integer; var a: culaDouble; lda: Integer;
        var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDorgqr';
      function culaDorgrq(m: Integer; n: Integer; k: Integer; var a: culaDouble; lda: Integer;
        var tau: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDorgrq';
      function culaDormlq(side: char; trans: char; m: Integer; n: Integer; k: Integer;
        var a: culaDouble; lda: Integer; var tau: culaDouble; var c: culaDouble; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaDormlq';
      function culaDormql(side: char; trans: char; m: Integer; n: Integer; k: Integer;
        var a: culaDouble; lda: Integer; var tau: culaDouble; var c: culaDouble; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaDormql';
      function culaDormqr(side: char; trans: char; m: Integer; n: Integer; k: Integer;
        var a: culaDouble; lda: Integer; var tau: culaDouble; var c: culaDouble; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaDormqr';
      function culaDormrq(side: char; trans: char; m: Integer; n: Integer; k: Integer;
        var a: culaDouble; lda: Integer; var tau: culaDouble; var c: culaDouble; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaDormrq';
      function culaDpbtrf(uplo: char; n: Integer; kd: Integer; var ab: culaDouble;
        ldab: Integer): culaStatus; stdcall; external CULADLL name 'culaDpbtrf';
      function culaDposv(uplo: char; n: Integer; nrhs: Integer; var a: culaDouble;
        lda: Integer; var b: culaDouble; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaDposv';
      function culaDpotrf(uplo: char; n: Integer; var a: culaDouble; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaDpotrf';
      function culaDpotrs(uplo: char; n: Integer; nrhs: Integer; var a: culaDouble;
        lda: Integer; var b: culaDouble; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaDpotrs';
      function culaDsgesv(n: Integer; nrhs: Integer; var a: culaDouble; lda: Integer;
        var ipiv: culaInt; var b: culaDouble; ldb: Integer; var x: culaDouble; ldx: Integer;
        var iter: Integer): culaStatus; stdcall; external CULADLL name 'culaDsgesv';
      function culaDsposv(uplo: char; n: Integer; nrhs: Integer; var a: culaDouble;
        lda: Integer; var b: culaDouble; ldb: Integer; var x: culaDouble; ldx: Integer;
        var iter: Integer): culaStatus; stdcall; external CULADLL name 'culaDsposv';
      function culaDstebz(range: char; order: char; n: Integer; vl: double; vu: double;
        il: Integer; iu: Integer; abstol: double; var d: culaDouble; var e: culaDouble;
        var m: Integer; var nsplit: Integer; var w: culaDouble; var isplit: culaInt;
        var iblock: culaInt): culaStatus; stdcall; external CULADLL name 'culaDstebz';
      function culaDsteqr(compz: char; n: Integer; var d: culaDouble; var e: culaDouble;
        var z: culaDouble; ldz: Integer): culaStatus; stdcall; external CULADLL name 'culaDsteqr';
      function culaDsyev(jobz: char; uplo: char; n: Integer; var a: culaDouble; lda: Integer;
        var w: culaDouble): culaStatus; stdcall; external CULADLL name 'culaDsyev';
      function culaDsyevx(jobz: char; range: char; uplo: char; n: Integer; var a: culaDouble;
        lda: Integer; vl: culaDouble; vu: culaDouble; il: Integer; iu: Integer; abstol: culaDouble;
        var m: culaInt; var w: culaDouble; var z: culaDouble; ldz: Integer; var ifail: culaInt): culaStatus; stdcall; external CULADLL name 'culaDsyevx';
      function culaDsyrdb(jobz: char; uplo: char; n: Integer; kd: Integer; var a: culaDouble;
        lda: Integer; var d: culaDouble; var e: culaDouble; var tau: culaDouble; var z: culaDouble;
        ldz: Integer): culaStatus; stdcall; external CULADLL name 'culaDsyrdb';
      function culaDtrtri(uplo: char; diag: char; n: Integer; var a: culaDouble; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaDtrtri';
      function culaDtrtrs(uplo: char; trans: char; diag: char; n: Integer; nrhs: Integer;
        var a: culaDouble; lda: Integer; var b: culaDouble; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaDtrtrs';
      function culaSbdsqr(uplo: char; n: Integer; ncvt: Integer; nru: Integer; ncc: Integer;
        var d: culaFloat; var e: culaFloat; var vt: culaFloat; ldvt: Integer; var u: culaFloat;
        ldu: Integer; var c: culaFloat; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaSbdsqr';
      function culaSgbtrf(m: Integer; n: Integer; kl: Integer; ku: Integer; var a: culaFloat;
        lda: Integer; var ipiv: culaInt): culaStatus; stdcall; external CULADLL name 'culaSgbtrf';
      function culaSgeNancheck(m: Integer; n: Integer; var a: culaFloat; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaSgeNancheck';
      function culaSgeTranspose(m: Integer; n: Integer; var a: culaFloat; lda: Integer;
        var b: culaFloat; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaSgeTranspose';
      function culaSgeTransposeInplace(n: Integer; var a: culaFloat; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaSgeTransposeInplace';
      function culaSgebrd(m: Integer; n: Integer; var a: culaFloat; lda: Integer; var d: culaFloat;
        var e: culaFloat; var tauq: culaFloat; var taup: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSgebrd';
      function culaSgeev(jobvl: char; jobvr: char; n: Integer; var a: culaFloat; lda: Integer;
        var wr: culaFloat; var wi: culaFloat; var vl: culaFloat; ldvl: Integer; var vr: culaFloat;
        ldvr: Integer): culaStatus; stdcall; external CULADLL name 'culaSgeev';
      function culaSgehrd(n: Integer; ilo: Integer; ihi: Integer; var a: culaFloat;
        lda: Integer; var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSgehrd';
      function culaSgelqf(m: Integer; n: Integer; var a: culaFloat; lda: Integer; var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSgelqf';
      function culaSgels(trans: char; m: Integer; n: Integer; nrhs: Integer; var a: culaFloat;
        lda: Integer; var b: culaFloat; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaSgels';
      function culaSgeqlf(m: Integer; n: Integer; var a: culaFloat; lda: Integer; var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSgeqlf';
      function culaSgeqrf(m: Integer; n: Integer; var a: culaFloat; lda: Integer; var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSgeqrf';
      function culaSgeqrs(m: Integer; n: Integer; nrhs: Integer; var a: culaFloat;
        lda: Integer; var tau: culaFloat; var b: culaFloat; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaSgeqrs';
      function culaSgerqf(m: Integer; n: Integer; var a: culaFloat; lda: Integer; var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSgerqf';
      function culaSgesv(n: Integer; nrhs: Integer; var a: culaFloat; lda: Integer;
        var ipiv: culaInt; var b: culaFloat; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaSgesv';
      function culaSgesvd(jobu: char; jobvt: char; m: Integer; n: Integer; var a: culaFloat;
        lda: Integer; var s: culaFloat; var u: culaFloat; ldu: Integer; var vt: culaFloat;
        ldvt: Integer): culaStatus; stdcall; external CULADLL name 'culaSgesvd';
      function culaSgetrf(m: Integer; n: Integer; var a: culaFloat; lda: Integer; var ipiv: culaInt): culaStatus; stdcall; external CULADLL name 'culaSgetrf';
      function culaSgetri(n: Integer; var a: culaFloat; lda: Integer; var ipiv: culaInt): culaStatus; stdcall; external CULADLL name 'culaSgetri';
      function culaSgetrs(trans: char; n: Integer; nrhs: Integer; var a: culaFloat;
        lda: Integer; var ipiv: culaInt; var b: culaFloat; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaSgetrs';
      function culaSgglse(m: Integer; n: Integer; p: Integer; var a: culaFloat; lda: Integer;
        var b: culaFloat; ldb: Integer; var c: culaFloat; var d: culaFloat; var x: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSgglse';
      function culaSggrqf(m: Integer; p: Integer; n: Integer; var a: culaFloat; lda: Integer;
        var taua: culaFloat; var b: culaFloat; ldb: Integer; var taub: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSggrqf';
      function culaSlacpy(uplo: char; m: Integer; n: Integer; var a: culaFloat; lda: Integer;
        var b: culaFloat; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaSlacpy';
      function culaSlag2d(m: Integer; n: Integer; var a: culaFloat; lda: Integer; var sa: culaDouble;
        ldsa: Integer): culaStatus; stdcall; external CULADLL name 'culaSlag2d';
      function culaSlar2v(n: Integer; var x: culaFloat; var y: culaFloat; var z: culaFloat;
        incx: Integer; var c: culaFloat; var s: culaFloat; incc: Integer): culaStatus; stdcall; external CULADLL name 'culaSlar2v';
      function culaSlarfb(side: char; trans: char; direct: char; storev: char; m: Integer;
        n: Integer; k: Integer; var v: culaFloat; ldv: Integer; var t: culaFloat; ldt: Integer;
        var c: culaFloat; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaSlarfb';
      function culaSlarfg(n: Integer; var alpha: culaFloat; var x: culaFloat; incx: Integer;
        var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSlarfg';
      function culaSlargv(n: Integer; var x: culaFloat; incx: Integer; var y: culaFloat;
        incy: Integer; var c: culaFloat; incc: Integer): culaStatus; stdcall; external CULADLL name 'culaSlargv';
      function culaSlartv(n: Integer; var x: culaFloat; incx: Integer; var y: culaFloat;
        incy: Integer; var c: culaFloat; var s: culaFloat; incc: Integer): culaStatus; stdcall; external CULADLL name 'culaSlart';
      function culaSlascl(typee: char; kl: Integer; ku: Integer; cfrom: culaFloat; cto: culaFloat;
        m: Integer; n: Integer; var a: culaFloat; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaSlascl';
      function culaSlaset(uplo: char; m: Integer; n: Integer; alpha: culaFloat; beta: culaFloat;
        var a: culaFloat; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaSlaset';
      function culaSlasr(side: char; pivot: char; direct: char; m: Integer; n: Integer;
        var c: culaFloat; var s: culaFloat; var a: culaFloat; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaSlasr';
      function culaSlat2d(uplo: char; n: Integer; var a: culaFloat; lda: Integer; var sa: culaDouble;
        ldsa: Integer): culaStatus; stdcall; external CULADLL name 'culaSlat2d';
      function culaSorgbr(vect: char; m: Integer; n: Integer; k: Integer; var a: culaFloat;
        lda: Integer; var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSorgbr';
      function culaSorghr(n: Integer; ilo: Integer; ihi: Integer; var a: culaFloat;
        lda: Integer; var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSorghr';
      function culaSorglq(m: Integer; n: Integer; k: Integer; var a: culaFloat; lda: Integer;
        var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSorglq';
      function culaSorgql(m: Integer; n: Integer; k: Integer; var a: culaFloat; lda: Integer;
        var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSorgql';
      function culaSorgqr(m: Integer; n: Integer; k: Integer; var a: culaFloat; lda: Integer;
        var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSorgqr';
      function culaSorgrq(m: Integer; n: Integer; k: Integer; var a: culaFloat; lda: Integer;
        var tau: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSorgrq';
      function culaSormlq(side: char; trans: char; m: Integer; n: Integer; k: Integer;
        var a: culaFloat; lda: Integer; var tau: culaFloat; var c: culaFloat; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaSormlq';
      function culaSormql(side: char; trans: char; m: Integer; n: Integer; k: Integer;
        var a: culaFloat; lda: Integer; var tau: culaFloat; var c: culaFloat; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaSormql';
      function culaSormqr(side: char; trans: char; m: Integer; n: Integer; k: Integer;
        var a: culaFloat; lda: Integer; var tau: culaFloat; var c: culaFloat; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaSormqr';
      function culaSormrq(side: char; trans: char; m: Integer; n: Integer; k: Integer;
        var a: culaFloat; lda: Integer; var tau: culaFloat; var c: culaFloat; ldc: Integer): culaStatus; stdcall; external CULADLL name 'culaSormrq';
      function culaSpbtrf(uplo: char; n: Integer; kd: Integer; var ab: culaFloat; ldab: Integer): culaStatus; stdcall; external CULADLL name 'culaSpbtrf';
      function culaSposv(uplo: char; n: Integer; nrhs: Integer; var a: culaFloat; lda: Integer;
        var b: culaFloat; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaSposv';
      function culaSpotrf(uplo: char; n: Integer; var a: culaFloat; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaSpotrf';
      function culaSpotrs(uplo: char; n: Integer; nrhs: Integer; var a: culaFloat;
        lda: Integer; var b: culaFloat; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaSpotrs';
      function culaSstebz(range: char; order: char; n: Integer; vl: Single; vu: Single;
        il: Integer; iu: Integer; abstol: Single; var d: culaFloat; var e: culaFloat;
        var m: Integer; var nsplit: Integer; var w: culaFloat; var isplit: culaInt; var iblock: culaInt): culaStatus; stdcall; external CULADLL name 'culaSstebz';
      function culaSsteqr(compz: char; n: Integer; var d: culaFloat; var e: culaFloat;
        var z: culaFloat; ldz: Integer): culaStatus; stdcall; external CULADLL name 'culaSsteqr';
      function culaSsyev(jobz: char; uplo: char; n: Integer; var a: culaFloat; lda: Integer;
        var w: culaFloat): culaStatus; stdcall; external CULADLL name 'culaSsyev';
      function culaSsyevx(jobz: char; range: char; uplo: char; n: Integer; var a: culaFloat;
        lda: Integer; vl: culaFloat; vu: culaFloat; il: Integer; iu: Integer; abstol: culaFloat;
        var m: culaInt; var w: culaFloat; var z: culaFloat; ldz: Integer; var ifail: culaInt): culaStatus; stdcall; external CULADLL name 'culaSsyevx';
      function culaSsyrdb(jobz: char; uplo: char; n: Integer; kd: Integer; var a: culaFloat;
        lda: Integer; var d: culaFloat; var e: culaFloat; var tau: culaFloat; var z: culaFloat;
        ldz: Integer): culaStatus; stdcall; external CULADLL name 'culaSsyrdb';
      function culaStrtri(uplo: char; diag: char; n: Integer; var a: culaFloat; lda: Integer): culaStatus; stdcall; external CULADLL name 'culaStrtri';
      function culaStrtrs(uplo: char; trans: char; diag: char; n: Integer; nrhs: Integer;
        var a: culaFloat; lda: Integer; var b: culaFloat; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaStrtrs';
   
   
    (*  culablas.h functions only single and double  *)
    function culaDgemm(transa: char; transb: char; m: integer; n: integer; k: integer; alpha: culaDouble; var a: culaDouble; lda: integer; var b: culaDouble; ldb: integer; beta: culaDouble; var c: culaDouble; ldc: integer): culaStatus; stdcall; external CULADLL name 'culaDgemm';
    function culaDgemv(trans: char; m: integer; n: integer; alpha: culaDouble; var a: culaDouble; lda: integer; var x: culaDouble; incx: integer; beta: culaDouble; var y: culaDouble; incy: integer): culaStatus; stdcall; external CULADLL name 'culaDgemv';
    function culaDsymm(side: char; uplo: char; m: integer; n: integer; alpha: culaDouble; var a: culaDouble; lda: integer; var b: culaDouble; ldb: integer; beta: culaDouble; var c: culaDouble; ldc: integer): culaStatus; stdcall; external CULADLL name 'culaDsymm';
    function culaDsymv(uplo: char; n: integer; alpha: culaDouble; var a: culaDouble; lda: integer; var x: culaDouble; incx: integer; beta: culaDouble; var y: culaDouble; incy: integer): culaStatus; stdcall; external CULADLL name 'culaDsymv';
    function culaDsyr2k(uplo: char; trans: char; n: integer; k: integer; alpha: culaDouble; var a: culaDouble; lda: integer; var b: culaDouble; ldb: integer; beta: culaDouble; var c: culaDouble; ldc: integer): culaStatus; stdcall; external CULADLL name 'culaDsyr2k';
    function culaDsyrk(uplo: char; trans: char; n: integer; k: integer; alpha: culaDouble; var a: culaDouble; lda: integer; beta: culaDouble; var c: culaDouble; ldc: integer): culaStatus; stdcall; external CULADLL name 'culaDsyrk';
    function culaDtrmm(side: char; uplo: char; transa: char; diag: char; m: integer; n: integer; alpha: culaDouble; var a: culaDouble; lda: integer; var b: culaDouble; ldb: integer): culaStatus; stdcall; external CULADLL name 'culaDtrmm';
    function culaDtrsm(side: char; uplo: char; transa: char; diag: char; m: integer; n: integer; alpha: culaDouble; var a: culaDouble; lda: integer; var b: culaDouble; ldb: integer): culaStatus; stdcall; external CULADLL name 'culaDtrsm';
    function culaSgemm(transa: char; transb: char; m: integer; n: integer; k: integer; alpha: culaFloat; var a: culaFloat; lda: integer; var b: culaFloat; ldb: integer; beta: culaFloat; var c: culaFloat; ldc: integer): culaStatus; stdcall; external CULADLL name 'culaSgemm';
    function culaSgemv(trans: char; m: integer; n: integer; alpha: culaFloat; var a: culaFloat; lda: integer; var x: culaFloat; incx: integer; beta: culaFloat; var y: culaFloat; incy: integer): culaStatus; stdcall; external CULADLL name 'culaSgemv';
    function culaSsymm(side: char; uplo: char; m: integer; n: integer; alpha: culaFloat; var a: culaFloat; lda: integer; var b: culaFloat; ldb: integer; beta: culaFloat; var c: culaFloat; ldc: integer): culaStatus; stdcall; external CULADLL name 'culaSsymm';
    function culaSsymv(uplo: char; n: integer; alpha: culaFloat; var a: culaFloat; lda: integer; var x: culaFloat; incx: integer; beta: culaFloat; var y: culaFloat; incy: integer): culaStatus; stdcall; external CULADLL name 'culaSsymv';
    function culaSsyr2k(uplo: char; trans: char; n: integer; k: integer; alpha: culaFloat; var a: culaFloat; lda: integer; var b: culaFloat; ldb: integer; beta: culaFloat; var c: culaFloat; ldc: integer): culaStatus; stdcall; external CULADLL name 'culaSsyr2k';
    function culaSsyrk(uplo: char; trans: char; n: integer; k: integer; alpha: culaFloat; var a: culaFloat; lda: integer; beta: culaFloat; var c: culaFloat; ldc: integer): culaStatus; stdcall; external CULADLL name 'culaSsyrk';
    function culaStrmm(side: char; uplo: char; transa: char; diag: char; m: integer; n: integer; alpha: culaFloat; var a: culaFloat; lda: integer; var b: culaFloat; ldb: integer): culaStatus; stdcall; external CULADLL name 'culaStrmm';
    function culaStrsm(side: char; uplo: char; transa: char; diag: char; m: integer; n: integer; alpha: culaFloat; var a: culaFloat; lda: integer; var b: culaFloat; ldb: integer): culaStatus; stdcall; external CULADLL name 'culaStrsm';
   
   
    function culaDeviceDgesv(n: Integer; nrhs: Integer; var a: culaDouble; lda: Integer;
        var ipiv: culaInt; var b: culaDouble; ldb: Integer): culaStatus; stdcall; external CULADLL name 'culaDeviceDgesv';
   
   
    implementation
   
    function IDX2(i, j, rank: integer): integer;
    begin
     result:= j*rank+i;
    end;
   
    initialization
    begin
      {$R-}
      statuscula:= culaInitialize;
    end;
   
    finalization
    begin
      culaFreeBuffers;
      culaShutdown;
    end;
end.
gustavo
CULA Premium
 
Posts: 5
Joined: Fri Jun 17, 2011 7:03 pm

Re: CULA and CuBlas with Delphi

Postby gustavo » Thu Aug 25, 2011 5:53 am

gustavo
CULA Premium
 
Posts: 5
Joined: Fri Jun 17, 2011 7:03 pm

Re: CULA and CuBlas with Delphi

Postby john » Thu Aug 25, 2011 11:09 am

Quite interesting, and glad to see you got it working well!
john
Administrator
 
Posts: 587
Joined: Thu Jul 23, 2009 2:31 pm


Return to CULA Dense Support

Who is online

Users browsing this forum: No registered users and 3 guests

cron