Merge pull request #1869 from Gustav-Simonsson/gpu_miner
all: Add GPU mining, disabled by default
This commit is contained in:
commit
d5327ddc5f
|
@ -16,8 +16,8 @@
|
|||
},
|
||||
{
|
||||
"ImportPath": "github.com/ethereum/ethash",
|
||||
"Comment": "v23.1-234-g062e40a",
|
||||
"Rev": "062e40a1a1671f5a5102862b56e4c56f68a732f5"
|
||||
"Comment": "v23.1-235-gb39e007",
|
||||
"Rev": "b39e007d393ab5945b4c0748a7415b7e31c5db04"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/fatih/color",
|
||||
|
|
26
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl.go
generated
vendored
Normal file
26
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl.go
generated
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
Package cl provides a binding to the OpenCL api. It's mostly a low-level
|
||||
wrapper that avoids adding functionality while still making the interface
|
||||
a little more friendly and easy to use.
|
||||
|
||||
Resource life-cycle management:
|
||||
|
||||
For any CL object that gets created (buffer, queue, kernel, etc..) you should
|
||||
call object.Release() when finished with it to free the CL resources. This
|
||||
explicitely calls the related clXXXRelease method for the type. However,
|
||||
as a fallback there is a finalizer set for every resource item that takes
|
||||
care of it (eventually) if Release isn't called. In this way you can have
|
||||
better control over the life cycle of resources while having a fall back
|
||||
to avoid leaks. This is similar to how file handles and such are handled
|
||||
in the Go standard packages.
|
||||
*/
|
||||
package cl
|
||||
|
||||
// #include "headers/1.2/opencl.h"
|
||||
// #cgo CFLAGS: -Iheaders/1.2
|
||||
// #cgo darwin LDFLAGS: -framework OpenCL
|
||||
// #cgo linux LDFLAGS: -lOpenCL
|
||||
import "C"
|
||||
import "errors"
|
||||
|
||||
var ErrUnsupported = errors.New("cl: unsupported")
|
254
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl_test.go
generated
vendored
Normal file
254
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl_test.go
generated
vendored
Normal file
|
@ -0,0 +1,254 @@
|
|||
package cl
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var kernelSource = `
|
||||
__kernel void square(
|
||||
__global float* input,
|
||||
__global float* output,
|
||||
const unsigned int count)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
if(i < count)
|
||||
output[i] = input[i] * input[i];
|
||||
}
|
||||
`
|
||||
|
||||
func getObjectStrings(object interface{}) map[string]string {
|
||||
v := reflect.ValueOf(object)
|
||||
t := reflect.TypeOf(object)
|
||||
|
||||
strs := make(map[string]string)
|
||||
|
||||
numMethods := t.NumMethod()
|
||||
for i := 0; i < numMethods; i++ {
|
||||
method := t.Method(i)
|
||||
if method.Type.NumIn() == 1 && method.Type.NumOut() == 1 && method.Type.Out(0).Kind() == reflect.String {
|
||||
// this is a string-returning method with (presumably) only a pointer receiver parameter
|
||||
// call it
|
||||
outs := v.Method(i).Call([]reflect.Value{})
|
||||
// put the result in our map
|
||||
strs[method.Name] = (outs[0].Interface()).(string)
|
||||
}
|
||||
}
|
||||
|
||||
return strs
|
||||
}
|
||||
|
||||
func TestPlatformStringsContainNoNULs(t *testing.T) {
|
||||
platforms, err := GetPlatforms()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get platforms: %+v", err)
|
||||
}
|
||||
|
||||
for _, p := range platforms {
|
||||
for key, value := range getObjectStrings(p) {
|
||||
if strings.Contains(value, "\x00") {
|
||||
t.Fatalf("platform string %q = %+q contains NUL", key, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeviceStringsContainNoNULs(t *testing.T) {
|
||||
platforms, err := GetPlatforms()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get platforms: %+v", err)
|
||||
}
|
||||
|
||||
for _, p := range platforms {
|
||||
devs, err := p.GetDevices(DeviceTypeAll)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get devices for platform %q: %+v", p.Name(), err)
|
||||
}
|
||||
|
||||
for _, d := range devs {
|
||||
for key, value := range getObjectStrings(d) {
|
||||
if strings.Contains(value, "\x00") {
|
||||
t.Fatalf("device string %q = %+q contains NUL", key, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHello(t *testing.T) {
|
||||
var data [1024]float32
|
||||
for i := 0; i < len(data); i++ {
|
||||
data[i] = rand.Float32()
|
||||
}
|
||||
|
||||
platforms, err := GetPlatforms()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get platforms: %+v", err)
|
||||
}
|
||||
for i, p := range platforms {
|
||||
t.Logf("Platform %d:", i)
|
||||
t.Logf(" Name: %s", p.Name())
|
||||
t.Logf(" Vendor: %s", p.Vendor())
|
||||
t.Logf(" Profile: %s", p.Profile())
|
||||
t.Logf(" Version: %s", p.Version())
|
||||
t.Logf(" Extensions: %s", p.Extensions())
|
||||
}
|
||||
platform := platforms[0]
|
||||
|
||||
devices, err := platform.GetDevices(DeviceTypeAll)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get devices: %+v", err)
|
||||
}
|
||||
if len(devices) == 0 {
|
||||
t.Fatalf("GetDevices returned no devices")
|
||||
}
|
||||
deviceIndex := -1
|
||||
for i, d := range devices {
|
||||
if deviceIndex < 0 && d.Type() == DeviceTypeGPU {
|
||||
deviceIndex = i
|
||||
}
|
||||
t.Logf("Device %d (%s): %s", i, d.Type(), d.Name())
|
||||
t.Logf(" Address Bits: %d", d.AddressBits())
|
||||
t.Logf(" Available: %+v", d.Available())
|
||||
// t.Logf(" Built-In Kernels: %s", d.BuiltInKernels())
|
||||
t.Logf(" Compiler Available: %+v", d.CompilerAvailable())
|
||||
t.Logf(" Double FP Config: %s", d.DoubleFPConfig())
|
||||
t.Logf(" Driver Version: %s", d.DriverVersion())
|
||||
t.Logf(" Error Correction Supported: %+v", d.ErrorCorrectionSupport())
|
||||
t.Logf(" Execution Capabilities: %s", d.ExecutionCapabilities())
|
||||
t.Logf(" Extensions: %s", d.Extensions())
|
||||
t.Logf(" Global Memory Cache Type: %s", d.GlobalMemCacheType())
|
||||
t.Logf(" Global Memory Cacheline Size: %d KB", d.GlobalMemCachelineSize()/1024)
|
||||
t.Logf(" Global Memory Size: %d MB", d.GlobalMemSize()/(1024*1024))
|
||||
t.Logf(" Half FP Config: %s", d.HalfFPConfig())
|
||||
t.Logf(" Host Unified Memory: %+v", d.HostUnifiedMemory())
|
||||
t.Logf(" Image Support: %+v", d.ImageSupport())
|
||||
t.Logf(" Image2D Max Dimensions: %d x %d", d.Image2DMaxWidth(), d.Image2DMaxHeight())
|
||||
t.Logf(" Image3D Max Dimenionns: %d x %d x %d", d.Image3DMaxWidth(), d.Image3DMaxHeight(), d.Image3DMaxDepth())
|
||||
// t.Logf(" Image Max Buffer Size: %d", d.ImageMaxBufferSize())
|
||||
// t.Logf(" Image Max Array Size: %d", d.ImageMaxArraySize())
|
||||
// t.Logf(" Linker Available: %+v", d.LinkerAvailable())
|
||||
t.Logf(" Little Endian: %+v", d.EndianLittle())
|
||||
t.Logf(" Local Mem Size Size: %d KB", d.LocalMemSize()/1024)
|
||||
t.Logf(" Local Mem Type: %s", d.LocalMemType())
|
||||
t.Logf(" Max Clock Frequency: %d", d.MaxClockFrequency())
|
||||
t.Logf(" Max Compute Units: %d", d.MaxComputeUnits())
|
||||
t.Logf(" Max Constant Args: %d", d.MaxConstantArgs())
|
||||
t.Logf(" Max Constant Buffer Size: %d KB", d.MaxConstantBufferSize()/1024)
|
||||
t.Logf(" Max Mem Alloc Size: %d KB", d.MaxMemAllocSize()/1024)
|
||||
t.Logf(" Max Parameter Size: %d", d.MaxParameterSize())
|
||||
t.Logf(" Max Read-Image Args: %d", d.MaxReadImageArgs())
|
||||
t.Logf(" Max Samplers: %d", d.MaxSamplers())
|
||||
t.Logf(" Max Work Group Size: %d", d.MaxWorkGroupSize())
|
||||
t.Logf(" Max Work Item Dimensions: %d", d.MaxWorkItemDimensions())
|
||||
t.Logf(" Max Work Item Sizes: %d", d.MaxWorkItemSizes())
|
||||
t.Logf(" Max Write-Image Args: %d", d.MaxWriteImageArgs())
|
||||
t.Logf(" Memory Base Address Alignment: %d", d.MemBaseAddrAlign())
|
||||
t.Logf(" Native Vector Width Char: %d", d.NativeVectorWidthChar())
|
||||
t.Logf(" Native Vector Width Short: %d", d.NativeVectorWidthShort())
|
||||
t.Logf(" Native Vector Width Int: %d", d.NativeVectorWidthInt())
|
||||
t.Logf(" Native Vector Width Long: %d", d.NativeVectorWidthLong())
|
||||
t.Logf(" Native Vector Width Float: %d", d.NativeVectorWidthFloat())
|
||||
t.Logf(" Native Vector Width Double: %d", d.NativeVectorWidthDouble())
|
||||
t.Logf(" Native Vector Width Half: %d", d.NativeVectorWidthHalf())
|
||||
t.Logf(" OpenCL C Version: %s", d.OpenCLCVersion())
|
||||
// t.Logf(" Parent Device: %+v", d.ParentDevice())
|
||||
t.Logf(" Profile: %s", d.Profile())
|
||||
t.Logf(" Profiling Timer Resolution: %d", d.ProfilingTimerResolution())
|
||||
t.Logf(" Vendor: %s", d.Vendor())
|
||||
t.Logf(" Version: %s", d.Version())
|
||||
}
|
||||
if deviceIndex < 0 {
|
||||
deviceIndex = 0
|
||||
}
|
||||
device := devices[deviceIndex]
|
||||
t.Logf("Using device %d", deviceIndex)
|
||||
context, err := CreateContext([]*Device{device})
|
||||
if err != nil {
|
||||
t.Fatalf("CreateContext failed: %+v", err)
|
||||
}
|
||||
// imageFormats, err := context.GetSupportedImageFormats(0, MemObjectTypeImage2D)
|
||||
// if err != nil {
|
||||
// t.Fatalf("GetSupportedImageFormats failed: %+v", err)
|
||||
// }
|
||||
// t.Logf("Supported image formats: %+v", imageFormats)
|
||||
queue, err := context.CreateCommandQueue(device, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("CreateCommandQueue failed: %+v", err)
|
||||
}
|
||||
program, err := context.CreateProgramWithSource([]string{kernelSource})
|
||||
if err != nil {
|
||||
t.Fatalf("CreateProgramWithSource failed: %+v", err)
|
||||
}
|
||||
if err := program.BuildProgram(nil, ""); err != nil {
|
||||
t.Fatalf("BuildProgram failed: %+v", err)
|
||||
}
|
||||
kernel, err := program.CreateKernel("square")
|
||||
if err != nil {
|
||||
t.Fatalf("CreateKernel failed: %+v", err)
|
||||
}
|
||||
for i := 0; i < 3; i++ {
|
||||
name, err := kernel.ArgName(i)
|
||||
if err == ErrUnsupported {
|
||||
break
|
||||
} else if err != nil {
|
||||
t.Errorf("GetKernelArgInfo for name failed: %+v", err)
|
||||
break
|
||||
} else {
|
||||
t.Logf("Kernel arg %d: %s", i, name)
|
||||
}
|
||||
}
|
||||
input, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data))
|
||||
if err != nil {
|
||||
t.Fatalf("CreateBuffer failed for input: %+v", err)
|
||||
}
|
||||
output, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data))
|
||||
if err != nil {
|
||||
t.Fatalf("CreateBuffer failed for output: %+v", err)
|
||||
}
|
||||
if _, err := queue.EnqueueWriteBufferFloat32(input, true, 0, data[:], nil); err != nil {
|
||||
t.Fatalf("EnqueueWriteBufferFloat32 failed: %+v", err)
|
||||
}
|
||||
if err := kernel.SetArgs(input, output, uint32(len(data))); err != nil {
|
||||
t.Fatalf("SetKernelArgs failed: %+v", err)
|
||||
}
|
||||
|
||||
local, err := kernel.WorkGroupSize(device)
|
||||
if err != nil {
|
||||
t.Fatalf("WorkGroupSize failed: %+v", err)
|
||||
}
|
||||
t.Logf("Work group size: %d", local)
|
||||
size, _ := kernel.PreferredWorkGroupSizeMultiple(nil)
|
||||
t.Logf("Preferred Work Group Size Multiple: %d", size)
|
||||
|
||||
global := len(data)
|
||||
d := len(data) % local
|
||||
if d != 0 {
|
||||
global += local - d
|
||||
}
|
||||
if _, err := queue.EnqueueNDRangeKernel(kernel, nil, []int{global}, []int{local}, nil); err != nil {
|
||||
t.Fatalf("EnqueueNDRangeKernel failed: %+v", err)
|
||||
}
|
||||
|
||||
if err := queue.Finish(); err != nil {
|
||||
t.Fatalf("Finish failed: %+v", err)
|
||||
}
|
||||
|
||||
results := make([]float32, len(data))
|
||||
if _, err := queue.EnqueueReadBufferFloat32(output, true, 0, results, nil); err != nil {
|
||||
t.Fatalf("EnqueueReadBufferFloat32 failed: %+v", err)
|
||||
}
|
||||
|
||||
correct := 0
|
||||
for i, v := range data {
|
||||
if results[i] == v*v {
|
||||
correct++
|
||||
}
|
||||
}
|
||||
|
||||
if correct != len(data) {
|
||||
t.Fatalf("%d/%d correct values", correct, len(data))
|
||||
}
|
||||
}
|
161
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/context.go
generated
vendored
Normal file
161
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/context.go
generated
vendored
Normal file
|
@ -0,0 +1,161 @@
|
|||
package cl
|
||||
|
||||
// #include <stdlib.h>
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const maxImageFormats = 256
|
||||
|
||||
type Context struct {
|
||||
clContext C.cl_context
|
||||
devices []*Device
|
||||
}
|
||||
|
||||
type MemObject struct {
|
||||
clMem C.cl_mem
|
||||
size int
|
||||
}
|
||||
|
||||
func releaseContext(c *Context) {
|
||||
if c.clContext != nil {
|
||||
C.clReleaseContext(c.clContext)
|
||||
c.clContext = nil
|
||||
}
|
||||
}
|
||||
|
||||
func releaseMemObject(b *MemObject) {
|
||||
if b.clMem != nil {
|
||||
C.clReleaseMemObject(b.clMem)
|
||||
b.clMem = nil
|
||||
}
|
||||
}
|
||||
|
||||
func newMemObject(mo C.cl_mem, size int) *MemObject {
|
||||
memObject := &MemObject{clMem: mo, size: size}
|
||||
runtime.SetFinalizer(memObject, releaseMemObject)
|
||||
return memObject
|
||||
}
|
||||
|
||||
func (b *MemObject) Release() {
|
||||
releaseMemObject(b)
|
||||
}
|
||||
|
||||
// TODO: properties
|
||||
func CreateContext(devices []*Device) (*Context, error) {
|
||||
deviceIds := buildDeviceIdList(devices)
|
||||
var err C.cl_int
|
||||
clContext := C.clCreateContext(nil, C.cl_uint(len(devices)), &deviceIds[0], nil, nil, &err)
|
||||
if err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
if clContext == nil {
|
||||
return nil, ErrUnknown
|
||||
}
|
||||
context := &Context{clContext: clContext, devices: devices}
|
||||
runtime.SetFinalizer(context, releaseContext)
|
||||
return context, nil
|
||||
}
|
||||
|
||||
func (ctx *Context) GetSupportedImageFormats(flags MemFlag, imageType MemObjectType) ([]ImageFormat, error) {
|
||||
var formats [maxImageFormats]C.cl_image_format
|
||||
var nFormats C.cl_uint
|
||||
if err := C.clGetSupportedImageFormats(ctx.clContext, C.cl_mem_flags(flags), C.cl_mem_object_type(imageType), maxImageFormats, &formats[0], &nFormats); err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
fmts := make([]ImageFormat, nFormats)
|
||||
for i, f := range formats[:nFormats] {
|
||||
fmts[i] = ImageFormat{
|
||||
ChannelOrder: ChannelOrder(f.image_channel_order),
|
||||
ChannelDataType: ChannelDataType(f.image_channel_data_type),
|
||||
}
|
||||
}
|
||||
return fmts, nil
|
||||
}
|
||||
|
||||
func (ctx *Context) CreateCommandQueue(device *Device, properties CommandQueueProperty) (*CommandQueue, error) {
|
||||
var err C.cl_int
|
||||
clQueue := C.clCreateCommandQueue(ctx.clContext, device.id, C.cl_command_queue_properties(properties), &err)
|
||||
if err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
if clQueue == nil {
|
||||
return nil, ErrUnknown
|
||||
}
|
||||
commandQueue := &CommandQueue{clQueue: clQueue, device: device}
|
||||
runtime.SetFinalizer(commandQueue, releaseCommandQueue)
|
||||
return commandQueue, nil
|
||||
}
|
||||
|
||||
func (ctx *Context) CreateProgramWithSource(sources []string) (*Program, error) {
|
||||
cSources := make([]*C.char, len(sources))
|
||||
for i, s := range sources {
|
||||
cs := C.CString(s)
|
||||
cSources[i] = cs
|
||||
defer C.free(unsafe.Pointer(cs))
|
||||
}
|
||||
var err C.cl_int
|
||||
clProgram := C.clCreateProgramWithSource(ctx.clContext, C.cl_uint(len(sources)), &cSources[0], nil, &err)
|
||||
if err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
if clProgram == nil {
|
||||
return nil, ErrUnknown
|
||||
}
|
||||
program := &Program{clProgram: clProgram, devices: ctx.devices}
|
||||
runtime.SetFinalizer(program, releaseProgram)
|
||||
return program, nil
|
||||
}
|
||||
|
||||
func (ctx *Context) CreateBufferUnsafe(flags MemFlag, size int, dataPtr unsafe.Pointer) (*MemObject, error) {
|
||||
var err C.cl_int
|
||||
clBuffer := C.clCreateBuffer(ctx.clContext, C.cl_mem_flags(flags), C.size_t(size), dataPtr, &err)
|
||||
if err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
if clBuffer == nil {
|
||||
return nil, ErrUnknown
|
||||
}
|
||||
return newMemObject(clBuffer, size), nil
|
||||
}
|
||||
|
||||
func (ctx *Context) CreateEmptyBuffer(flags MemFlag, size int) (*MemObject, error) {
|
||||
return ctx.CreateBufferUnsafe(flags, size, nil)
|
||||
}
|
||||
|
||||
func (ctx *Context) CreateEmptyBufferFloat32(flags MemFlag, size int) (*MemObject, error) {
|
||||
return ctx.CreateBufferUnsafe(flags, 4*size, nil)
|
||||
}
|
||||
|
||||
func (ctx *Context) CreateBuffer(flags MemFlag, data []byte) (*MemObject, error) {
|
||||
return ctx.CreateBufferUnsafe(flags, len(data), unsafe.Pointer(&data[0]))
|
||||
}
|
||||
|
||||
//float64
|
||||
func (ctx *Context) CreateBufferFloat32(flags MemFlag, data []float32) (*MemObject, error) {
|
||||
return ctx.CreateBufferUnsafe(flags, 4*len(data), unsafe.Pointer(&data[0]))
|
||||
}
|
||||
|
||||
func (ctx *Context) CreateUserEvent() (*Event, error) {
|
||||
var err C.cl_int
|
||||
clEvent := C.clCreateUserEvent(ctx.clContext, &err)
|
||||
if err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
return newEvent(clEvent), nil
|
||||
}
|
||||
|
||||
func (ctx *Context) Release() {
|
||||
releaseContext(ctx)
|
||||
}
|
||||
|
||||
// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateSubBuffer.html
|
||||
// func (memObject *MemObject) CreateSubBuffer(flags MemFlag, bufferCreateType BufferCreateType, )
|
510
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device.go
generated
vendored
Normal file
510
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device.go
generated
vendored
Normal file
|
@ -0,0 +1,510 @@
|
|||
package cl
|
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #include "cl_ext.h"
|
||||
// #endif
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const maxDeviceCount = 64
|
||||
|
||||
type DeviceType uint
|
||||
|
||||
const (
|
||||
DeviceTypeCPU DeviceType = C.CL_DEVICE_TYPE_CPU
|
||||
DeviceTypeGPU DeviceType = C.CL_DEVICE_TYPE_GPU
|
||||
DeviceTypeAccelerator DeviceType = C.CL_DEVICE_TYPE_ACCELERATOR
|
||||
DeviceTypeDefault DeviceType = C.CL_DEVICE_TYPE_DEFAULT
|
||||
DeviceTypeAll DeviceType = C.CL_DEVICE_TYPE_ALL
|
||||
)
|
||||
|
||||
type FPConfig int
|
||||
|
||||
const (
|
||||
FPConfigDenorm FPConfig = C.CL_FP_DENORM // denorms are supported
|
||||
FPConfigInfNaN FPConfig = C.CL_FP_INF_NAN // INF and NaNs are supported
|
||||
FPConfigRoundToNearest FPConfig = C.CL_FP_ROUND_TO_NEAREST // round to nearest even rounding mode supported
|
||||
FPConfigRoundToZero FPConfig = C.CL_FP_ROUND_TO_ZERO // round to zero rounding mode supported
|
||||
FPConfigRoundToInf FPConfig = C.CL_FP_ROUND_TO_INF // round to positive and negative infinity rounding modes supported
|
||||
FPConfigFMA FPConfig = C.CL_FP_FMA // IEEE754-2008 fused multiply-add is supported
|
||||
FPConfigSoftFloat FPConfig = C.CL_FP_SOFT_FLOAT // Basic floating-point operations (such as addition, subtraction, multiplication) are implemented in software
|
||||
)
|
||||
|
||||
var fpConfigNameMap = map[FPConfig]string{
|
||||
FPConfigDenorm: "Denorm",
|
||||
FPConfigInfNaN: "InfNaN",
|
||||
FPConfigRoundToNearest: "RoundToNearest",
|
||||
FPConfigRoundToZero: "RoundToZero",
|
||||
FPConfigRoundToInf: "RoundToInf",
|
||||
FPConfigFMA: "FMA",
|
||||
FPConfigSoftFloat: "SoftFloat",
|
||||
}
|
||||
|
||||
func (c FPConfig) String() string {
|
||||
var parts []string
|
||||
for bit, name := range fpConfigNameMap {
|
||||
if c&bit != 0 {
|
||||
parts = append(parts, name)
|
||||
}
|
||||
}
|
||||
if parts == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.Join(parts, "|")
|
||||
}
|
||||
|
||||
func (dt DeviceType) String() string {
|
||||
var parts []string
|
||||
if dt&DeviceTypeCPU != 0 {
|
||||
parts = append(parts, "CPU")
|
||||
}
|
||||
if dt&DeviceTypeGPU != 0 {
|
||||
parts = append(parts, "GPU")
|
||||
}
|
||||
if dt&DeviceTypeAccelerator != 0 {
|
||||
parts = append(parts, "Accelerator")
|
||||
}
|
||||
if dt&DeviceTypeDefault != 0 {
|
||||
parts = append(parts, "Default")
|
||||
}
|
||||
if parts == nil {
|
||||
parts = append(parts, "None")
|
||||
}
|
||||
return strings.Join(parts, "|")
|
||||
}
|
||||
|
||||
type Device struct {
|
||||
id C.cl_device_id
|
||||
}
|
||||
|
||||
func buildDeviceIdList(devices []*Device) []C.cl_device_id {
|
||||
deviceIds := make([]C.cl_device_id, len(devices))
|
||||
for i, d := range devices {
|
||||
deviceIds[i] = d.id
|
||||
}
|
||||
return deviceIds
|
||||
}
|
||||
|
||||
// Obtain the list of devices available on a platform. 'platform' refers
|
||||
// to the platform returned by GetPlatforms or can be nil. If platform
|
||||
// is nil, the behavior is implementation-defined.
|
||||
func GetDevices(platform *Platform, deviceType DeviceType) ([]*Device, error) {
|
||||
var deviceIds [maxDeviceCount]C.cl_device_id
|
||||
var numDevices C.cl_uint
|
||||
var platformId C.cl_platform_id
|
||||
if platform != nil {
|
||||
platformId = platform.id
|
||||
}
|
||||
if err := C.clGetDeviceIDs(platformId, C.cl_device_type(deviceType), C.cl_uint(maxDeviceCount), &deviceIds[0], &numDevices); err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
if numDevices > maxDeviceCount {
|
||||
numDevices = maxDeviceCount
|
||||
}
|
||||
devices := make([]*Device, numDevices)
|
||||
for i := 0; i < int(numDevices); i++ {
|
||||
devices[i] = &Device{id: deviceIds[i]}
|
||||
}
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
func (d *Device) nullableId() C.cl_device_id {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
return d.id
|
||||
}
|
||||
|
||||
func (d *Device) GetInfoString(param C.cl_device_info, panicOnError bool) (string, error) {
|
||||
var strC [1024]C.char
|
||||
var strN C.size_t
|
||||
if err := C.clGetDeviceInfo(d.id, param, 1024, unsafe.Pointer(&strC), &strN); err != C.CL_SUCCESS {
|
||||
if panicOnError {
|
||||
panic("Should never fail")
|
||||
}
|
||||
return "", toError(err)
|
||||
}
|
||||
|
||||
// OpenCL strings are NUL-terminated, and the terminator is included in strN
|
||||
// Go strings aren't NUL-terminated, so subtract 1 from the length
|
||||
return C.GoStringN((*C.char)(unsafe.Pointer(&strC)), C.int(strN-1)), nil
|
||||
}
|
||||
|
||||
func (d *Device) getInfoUint(param C.cl_device_info, panicOnError bool) (uint, error) {
|
||||
var val C.cl_uint
|
||||
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
|
||||
if panicOnError {
|
||||
panic("Should never fail")
|
||||
}
|
||||
return 0, toError(err)
|
||||
}
|
||||
return uint(val), nil
|
||||
}
|
||||
|
||||
func (d *Device) getInfoSize(param C.cl_device_info, panicOnError bool) (int, error) {
|
||||
var val C.size_t
|
||||
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
|
||||
if panicOnError {
|
||||
panic("Should never fail")
|
||||
}
|
||||
return 0, toError(err)
|
||||
}
|
||||
return int(val), nil
|
||||
}
|
||||
|
||||
func (d *Device) getInfoUlong(param C.cl_device_info, panicOnError bool) (int64, error) {
|
||||
var val C.cl_ulong
|
||||
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
|
||||
if panicOnError {
|
||||
panic("Should never fail")
|
||||
}
|
||||
return 0, toError(err)
|
||||
}
|
||||
return int64(val), nil
|
||||
}
|
||||
|
||||
func (d *Device) getInfoBool(param C.cl_device_info, panicOnError bool) (bool, error) {
|
||||
var val C.cl_bool
|
||||
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
|
||||
if panicOnError {
|
||||
panic("Should never fail")
|
||||
}
|
||||
return false, toError(err)
|
||||
}
|
||||
return val == C.CL_TRUE, nil
|
||||
}
|
||||
|
||||
func (d *Device) Name() string {
|
||||
str, _ := d.GetInfoString(C.CL_DEVICE_NAME, true)
|
||||
return str
|
||||
}
|
||||
|
||||
func (d *Device) Vendor() string {
|
||||
str, _ := d.GetInfoString(C.CL_DEVICE_VENDOR, true)
|
||||
return str
|
||||
}
|
||||
|
||||
func (d *Device) Extensions() string {
|
||||
str, _ := d.GetInfoString(C.CL_DEVICE_EXTENSIONS, true)
|
||||
return str
|
||||
}
|
||||
|
||||
func (d *Device) OpenCLCVersion() string {
|
||||
str, _ := d.GetInfoString(C.CL_DEVICE_OPENCL_C_VERSION, true)
|
||||
return str
|
||||
}
|
||||
|
||||
func (d *Device) Profile() string {
|
||||
str, _ := d.GetInfoString(C.CL_DEVICE_PROFILE, true)
|
||||
return str
|
||||
}
|
||||
|
||||
func (d *Device) Version() string {
|
||||
str, _ := d.GetInfoString(C.CL_DEVICE_VERSION, true)
|
||||
return str
|
||||
}
|
||||
|
||||
func (d *Device) DriverVersion() string {
|
||||
str, _ := d.GetInfoString(C.CL_DRIVER_VERSION, true)
|
||||
return str
|
||||
}
|
||||
|
||||
// The default compute device address space size specified as an
|
||||
// unsigned integer value in bits. Currently supported values are 32 or 64 bits.
|
||||
func (d *Device) AddressBits() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_ADDRESS_BITS, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Size of global memory cache line in bytes.
|
||||
func (d *Device) GlobalMemCachelineSize() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Maximum configured clock frequency of the device in MHz.
|
||||
func (d *Device) MaxClockFrequency() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CLOCK_FREQUENCY, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// The number of parallel compute units on the OpenCL device.
|
||||
// A work-group executes on a single compute unit. The minimum value is 1.
|
||||
func (d *Device) MaxComputeUnits() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_COMPUTE_UNITS, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Max number of arguments declared with the __constant qualifier in a kernel.
|
||||
// The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MaxConstantArgs() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CONSTANT_ARGS, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Max number of simultaneous image objects that can be read by a kernel.
|
||||
// The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) MaxReadImageArgs() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_READ_IMAGE_ARGS, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Maximum number of samplers that can be used in a kernel. The minimum
|
||||
// value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. (Also see sampler_t.)
|
||||
func (d *Device) MaxSamplers() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_SAMPLERS, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Maximum dimensions that specify the global and local work-item IDs used
|
||||
// by the data parallel execution model. (Refer to clEnqueueNDRangeKernel).
|
||||
// The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MaxWorkItemDimensions() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Max number of simultaneous image objects that can be written to by a
|
||||
// kernel. The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) MaxWriteImageArgs() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WRITE_IMAGE_ARGS, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// The minimum value is the size (in bits) of the largest OpenCL built-in
|
||||
// data type supported by the device (long16 in FULL profile, long16 or
|
||||
// int16 in EMBEDDED profile) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MemBaseAddrAlign() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MEM_BASE_ADDR_ALIGN, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
func (d *Device) NativeVectorWidthChar() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
func (d *Device) NativeVectorWidthShort() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
func (d *Device) NativeVectorWidthInt() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
func (d *Device) NativeVectorWidthLong() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
func (d *Device) NativeVectorWidthFloat() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
func (d *Device) NativeVectorWidthDouble() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
func (d *Device) NativeVectorWidthHalf() int {
|
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Max height of 2D image in pixels. The minimum value is 8192
|
||||
// if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) Image2DMaxHeight() int {
|
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_HEIGHT, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Max width of 2D image or 1D image not created from a buffer object in
|
||||
// pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) Image2DMaxWidth() int {
|
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_WIDTH, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) Image3DMaxDepth() int {
|
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_DEPTH, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) Image3DMaxHeight() int {
|
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_HEIGHT, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) Image3DMaxWidth() int {
|
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_WIDTH, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Max size in bytes of the arguments that can be passed to a kernel. The
|
||||
// minimum value is 1024 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
// For this minimum value, only a maximum of 128 arguments can be passed to a kernel.
|
||||
func (d *Device) MaxParameterSize() int {
|
||||
val, _ := d.getInfoSize(C.CL_DEVICE_MAX_PARAMETER_SIZE, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Maximum number of work-items in a work-group executing a kernel on a
|
||||
// single compute unit, using the data parallel execution model. (Refer
|
||||
// to clEnqueueNDRangeKernel). The minimum value is 1.
|
||||
func (d *Device) MaxWorkGroupSize() int {
|
||||
val, _ := d.getInfoSize(C.CL_DEVICE_MAX_WORK_GROUP_SIZE, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Describes the resolution of device timer. This is measured in nanoseconds.
|
||||
func (d *Device) ProfilingTimerResolution() int {
|
||||
val, _ := d.getInfoSize(C.CL_DEVICE_PROFILING_TIMER_RESOLUTION, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Size of local memory arena in bytes. The minimum value is 32 KB for
|
||||
// devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) LocalMemSize() int64 {
|
||||
val, _ := d.getInfoUlong(C.CL_DEVICE_LOCAL_MEM_SIZE, true)
|
||||
return val
|
||||
}
|
||||
|
||||
// Max size in bytes of a constant buffer allocation. The minimum value is
|
||||
// 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MaxConstantBufferSize() int64 {
|
||||
val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, true)
|
||||
return val
|
||||
}
|
||||
|
||||
// Max size of memory object allocation in bytes. The minimum value is max
|
||||
// (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) for devices that are
|
||||
// not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MaxMemAllocSize() int64 {
|
||||
val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_MEM_ALLOC_SIZE, true)
|
||||
return val
|
||||
}
|
||||
|
||||
// Size of global device memory in bytes.
|
||||
func (d *Device) GlobalMemSize() int64 {
|
||||
val, _ := d.getInfoUlong(C.CL_DEVICE_GLOBAL_MEM_SIZE, true)
|
||||
return val
|
||||
}
|
||||
|
||||
func (d *Device) Available() bool {
|
||||
val, _ := d.getInfoBool(C.CL_DEVICE_AVAILABLE, true)
|
||||
return val
|
||||
}
|
||||
|
||||
func (d *Device) CompilerAvailable() bool {
|
||||
val, _ := d.getInfoBool(C.CL_DEVICE_COMPILER_AVAILABLE, true)
|
||||
return val
|
||||
}
|
||||
|
||||
func (d *Device) EndianLittle() bool {
|
||||
val, _ := d.getInfoBool(C.CL_DEVICE_ENDIAN_LITTLE, true)
|
||||
return val
|
||||
}
|
||||
|
||||
// Is CL_TRUE if the device implements error correction for all
|
||||
// accesses to compute device memory (global and constant). Is
|
||||
// CL_FALSE if the device does not implement such error correction.
|
||||
func (d *Device) ErrorCorrectionSupport() bool {
|
||||
val, _ := d.getInfoBool(C.CL_DEVICE_ERROR_CORRECTION_SUPPORT, true)
|
||||
return val
|
||||
}
|
||||
|
||||
func (d *Device) HostUnifiedMemory() bool {
|
||||
val, _ := d.getInfoBool(C.CL_DEVICE_HOST_UNIFIED_MEMORY, true)
|
||||
return val
|
||||
}
|
||||
|
||||
func (d *Device) ImageSupport() bool {
|
||||
val, _ := d.getInfoBool(C.CL_DEVICE_IMAGE_SUPPORT, true)
|
||||
return val
|
||||
}
|
||||
|
||||
func (d *Device) Type() DeviceType {
|
||||
var deviceType C.cl_device_type
|
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_TYPE, C.size_t(unsafe.Sizeof(deviceType)), unsafe.Pointer(&deviceType), nil); err != C.CL_SUCCESS {
|
||||
panic("Failed to get device type")
|
||||
}
|
||||
return DeviceType(deviceType)
|
||||
}
|
||||
|
||||
// Describes double precision floating-point capability of the OpenCL device
|
||||
func (d *Device) DoubleFPConfig() FPConfig {
|
||||
var fpConfig C.cl_device_fp_config
|
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_DOUBLE_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil); err != C.CL_SUCCESS {
|
||||
panic("Failed to get double FP config")
|
||||
}
|
||||
return FPConfig(fpConfig)
|
||||
}
|
||||
|
||||
// Describes the OPTIONAL half precision floating-point capability of the OpenCL device
|
||||
func (d *Device) HalfFPConfig() FPConfig {
|
||||
var fpConfig C.cl_device_fp_config
|
||||
err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_HALF_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil)
|
||||
if err != C.CL_SUCCESS {
|
||||
return FPConfig(0)
|
||||
}
|
||||
return FPConfig(fpConfig)
|
||||
}
|
||||
|
||||
// Type of local memory supported. This can be set to CL_LOCAL implying dedicated
|
||||
// local memory storage such as SRAM, or CL_GLOBAL. For custom devices, CL_NONE
|
||||
// can also be returned indicating no local memory support.
|
||||
func (d *Device) LocalMemType() LocalMemType {
|
||||
var memType C.cl_device_local_mem_type
|
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_LOCAL_MEM_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
|
||||
return LocalMemType(C.CL_NONE)
|
||||
}
|
||||
return LocalMemType(memType)
|
||||
}
|
||||
|
||||
// Describes the execution capabilities of the device. The mandated minimum capability is CL_EXEC_KERNEL.
|
||||
func (d *Device) ExecutionCapabilities() ExecCapability {
|
||||
var execCap C.cl_device_exec_capabilities
|
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_EXECUTION_CAPABILITIES, C.size_t(unsafe.Sizeof(execCap)), unsafe.Pointer(&execCap), nil); err != C.CL_SUCCESS {
|
||||
panic("Failed to get execution capabilities")
|
||||
}
|
||||
return ExecCapability(execCap)
|
||||
}
|
||||
|
||||
func (d *Device) GlobalMemCacheType() MemCacheType {
|
||||
var memType C.cl_device_mem_cache_type
|
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
|
||||
return MemCacheType(C.CL_NONE)
|
||||
}
|
||||
return MemCacheType(memType)
|
||||
}
|
||||
|
||||
// Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel.
|
||||
//
|
||||
// Returns n size_t entries, where n is the value returned by the query for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.
|
||||
//
|
||||
// The minimum value is (1, 1, 1) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MaxWorkItemSizes() []int {
|
||||
dims := d.MaxWorkItemDimensions()
|
||||
sizes := make([]C.size_t, dims)
|
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_MAX_WORK_ITEM_SIZES, C.size_t(int(unsafe.Sizeof(sizes[0]))*dims), unsafe.Pointer(&sizes[0]), nil); err != C.CL_SUCCESS {
|
||||
panic("Failed to get max work item sizes")
|
||||
}
|
||||
intSizes := make([]int, dims)
|
||||
for i, s := range sizes {
|
||||
intSizes[i] = int(s)
|
||||
}
|
||||
return intSizes
|
||||
}
|
51
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device12.go
generated
vendored
Normal file
51
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device12.go
generated
vendored
Normal file
|
@ -0,0 +1,51 @@
|
|||
// +build cl12
|
||||
|
||||
package cl
|
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
import "unsafe"
|
||||
|
||||
const FPConfigCorrectlyRoundedDivideSqrt FPConfig = C.CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
|
||||
|
||||
func init() {
|
||||
fpConfigNameMap[FPConfigCorrectlyRoundedDivideSqrt] = "CorrectlyRoundedDivideSqrt"
|
||||
}
|
||||
|
||||
func (d *Device) BuiltInKernels() string {
|
||||
str, _ := d.getInfoString(C.CL_DEVICE_BUILT_IN_KERNELS, true)
|
||||
return str
|
||||
}
|
||||
|
||||
// Is CL_FALSE if the implementation does not have a linker available. Is CL_TRUE if the linker is available. This can be CL_FALSE for the embedded platform profile only. This must be CL_TRUE if CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE
|
||||
func (d *Device) LinkerAvailable() bool {
|
||||
val, _ := d.getInfoBool(C.CL_DEVICE_LINKER_AVAILABLE, true)
|
||||
return val
|
||||
}
|
||||
|
||||
func (d *Device) ParentDevice() *Device {
|
||||
var deviceId C.cl_device_id
|
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_PARENT_DEVICE, C.size_t(unsafe.Sizeof(deviceId)), unsafe.Pointer(&deviceId), nil); err != C.CL_SUCCESS {
|
||||
panic("ParentDevice failed")
|
||||
}
|
||||
if deviceId == nil {
|
||||
return nil
|
||||
}
|
||||
return &Device{id: deviceId}
|
||||
}
|
||||
|
||||
// Max number of pixels for a 1D image created from a buffer object. The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) ImageMaxBufferSize() int {
|
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, true)
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// Max number of images in a 1D or 2D image array. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
|
||||
func (d *Device) ImageMaxArraySize() int {
|
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, true)
|
||||
return int(val)
|
||||
}
|
1210
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
generated
vendored
Normal file
1210
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
315
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
generated
vendored
Normal file
315
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
generated
vendored
Normal file
|
@ -0,0 +1,315 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2013 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
|
||||
|
||||
/* cl_ext.h contains OpenCL extensions which don't have external */
|
||||
/* (OpenGL, D3D) dependencies. */
|
||||
|
||||
#ifndef __CL_EXT_H
|
||||
#define __CL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <AvailabilityMacros.h>
|
||||
#endif
|
||||
|
||||
#include <cl.h>
|
||||
|
||||
/* cl_khr_fp16 extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
|
||||
|
||||
/* Memory object destruction
|
||||
*
|
||||
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
|
||||
*
|
||||
* Registers a user callback function that will be called when the memory object is deleted and its resources
|
||||
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
|
||||
* stack associated with memobj. The registered user callback functions are called in the reverse order in
|
||||
* which they were registered. The user callback functions are called and then the memory object is deleted
|
||||
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
|
||||
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
|
||||
* the storage bits for the memory object, can be reused or freed.
|
||||
*
|
||||
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
|
||||
*
|
||||
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*/
|
||||
#define cl_APPLE_SetMemObjectDestructor 1
|
||||
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
|
||||
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
|
||||
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Context Logging Functions
|
||||
*
|
||||
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
|
||||
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*
|
||||
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
|
||||
*/
|
||||
#define cl_APPLE_ContextLoggingFunctions 1
|
||||
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
|
||||
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
|
||||
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/************************
|
||||
* cl_khr_icd extension *
|
||||
************************/
|
||||
#define cl_khr_icd 1
|
||||
|
||||
/* cl_platform_info */
|
||||
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_PLATFORM_NOT_FOUND_KHR -1001
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
|
||||
cl_platform_id * /* platforms */,
|
||||
cl_uint * /* num_platforms */);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
|
||||
cl_uint /* num_entries */,
|
||||
cl_platform_id * /* platforms */,
|
||||
cl_uint * /* num_platforms */);
|
||||
|
||||
|
||||
/* Extension: cl_khr_image2D_buffer
|
||||
*
|
||||
* This extension allows a 2D image to be created from a cl_mem buffer without a copy.
|
||||
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
|
||||
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images
|
||||
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
|
||||
* for 2D images created from a buffer.
|
||||
*
|
||||
* When the 2D image from buffer is created, the client must specify the width,
|
||||
* height, image format (i.e. channel order and channel data type) and optionally the row pitch
|
||||
*
|
||||
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
|
||||
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
|
||||
*/
|
||||
|
||||
/*************************************
|
||||
* cl_khr_initalize_memory extension *
|
||||
*************************************/
|
||||
|
||||
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E
|
||||
|
||||
|
||||
/**************************************
|
||||
* cl_khr_terminate_context extension *
|
||||
**************************************/
|
||||
|
||||
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F
|
||||
#define CL_CONTEXT_TERMINATE_KHR 0x2010
|
||||
|
||||
#define cl_khr_terminate_context 1
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/*
|
||||
* Extension: cl_khr_spir
|
||||
*
|
||||
* This extension adds support to create an OpenCL program object from a
|
||||
* Standard Portable Intermediate Representation (SPIR) instance
|
||||
*/
|
||||
|
||||
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
|
||||
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_nv_device_attribute_query extension *
|
||||
******************************************/
|
||||
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
||||
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
|
||||
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_attribute_query *
|
||||
*********************************/
|
||||
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
|
||||
|
||||
/*********************************
|
||||
* cl_arm_printf extension
|
||||
*********************************/
|
||||
#define CL_PRINTF_CALLBACK_ARM 0x40B0
|
||||
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
|
||||
|
||||
#ifdef CL_VERSION_1_1
|
||||
/***********************************
|
||||
* cl_ext_device_fission extension *
|
||||
***********************************/
|
||||
#define cl_ext_device_fission 1
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_ulong cl_device_partition_property_ext;
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clCreateSubDevicesEXT( cl_device_id /*in_device*/,
|
||||
const cl_device_partition_property_ext * /* properties */,
|
||||
cl_uint /*num_entries*/,
|
||||
cl_device_id * /*out_devices*/,
|
||||
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
|
||||
const cl_device_partition_property_ext * /* properties */,
|
||||
cl_uint /*num_entries*/,
|
||||
cl_device_id * /*out_devices*/,
|
||||
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
/* cl_device_partition_property_ext */
|
||||
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
|
||||
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
|
||||
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
|
||||
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
|
||||
|
||||
/* clDeviceGetInfo selectors */
|
||||
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
|
||||
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
|
||||
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
|
||||
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
|
||||
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
|
||||
|
||||
/* error codes */
|
||||
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
|
||||
#define CL_INVALID_PARTITION_COUNT_EXT -1058
|
||||
#define CL_INVALID_PARTITION_NAME_EXT -1059
|
||||
|
||||
/* CL_AFFINITY_DOMAINs */
|
||||
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
|
||||
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
|
||||
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
|
||||
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
|
||||
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
|
||||
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
|
||||
|
||||
/* cl_device_partition_property_ext list terminators */
|
||||
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ext_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
|
||||
|
||||
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
|
||||
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
|
||||
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
|
||||
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
|
||||
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
|
||||
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
|
||||
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
|
||||
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
|
||||
|
||||
typedef cl_uint cl_image_pitch_info_qcom;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceImageInfoQCOM(cl_device_id device,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
const cl_image_format *image_format,
|
||||
cl_image_pitch_info_qcom param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
typedef struct _cl_mem_ext_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Legal values will be defined in layered extensions. */
|
||||
cl_uint allocation_type;
|
||||
|
||||
/* Host cache policy for this external memory allocation. */
|
||||
cl_uint host_cache_policy;
|
||||
|
||||
} cl_mem_ext_host_ptr;
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ion_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
|
||||
|
||||
typedef struct _cl_mem_ion_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
|
||||
cl_mem_ext_host_ptr ext_host_ptr;
|
||||
|
||||
/* ION file descriptor */
|
||||
int ion_filedesc;
|
||||
|
||||
/* Host pointer to the ION allocated memory */
|
||||
void* ion_hostptr;
|
||||
|
||||
} cl_mem_ion_host_ptr;
|
||||
|
||||
#endif /* CL_VERSION_1_1 */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CL_EXT_H */
|
158
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
generated
vendored
Normal file
158
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
generated
vendored
Normal file
|
@ -0,0 +1,158 @@
|
|||
/**********************************************************************************
|
||||
* Copyright (c) 2008 - 2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_H
|
||||
#define __OPENCL_CL_GL_H
|
||||
|
||||
#include <cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_gl_object_type;
|
||||
typedef cl_uint cl_gl_texture_info;
|
||||
typedef cl_uint cl_gl_platform_info;
|
||||
typedef struct __GLsync *cl_GLsync;
|
||||
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||
|
||||
/* cl_gl_texture_info */
|
||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||
#define CL_GL_NUM_SAMPLES 0x2012
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLBuffer(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLuint /* bufobj */,
|
||||
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLRenderbuffer(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLuint /* renderbuffer */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLObjectInfo(cl_mem /* memobj */,
|
||||
cl_gl_object_type * /* gl_object_type */,
|
||||
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLTextureInfo(cl_mem /* memobj */,
|
||||
cl_gl_texture_info /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Deprecated OpenCL 1.1 APIs */
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
/* cl_khr_gl_sharing extension */
|
||||
|
||||
#define cl_khr_gl_sharing 1
|
||||
|
||||
typedef cl_uint cl_gl_context_info;
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
||||
|
||||
/* cl_gl_context_info */
|
||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
||||
|
||||
/* Additional cl_context_properties */
|
||||
#define CL_GL_CONTEXT_KHR 0x2008
|
||||
#define CL_EGL_DISPLAY_KHR 0x2009
|
||||
#define CL_GLX_DISPLAY_KHR 0x200A
|
||||
#define CL_WGL_HDC_KHR 0x200B
|
||||
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
|
||||
cl_gl_context_info /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_H */
|
65
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
generated
vendored
Normal file
65
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
generated
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
|
||||
/* OpenGL dependencies. */
|
||||
|
||||
#ifndef __OPENCL_CL_GL_EXT_H
|
||||
#define __OPENCL_CL_GL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <cl_gl.h>
|
||||
|
||||
/*
|
||||
* For each extension, follow this template
|
||||
* cl_VEN_extname extension */
|
||||
/* #define cl_VEN_extname 1
|
||||
* ... define new types, if any
|
||||
* ... define new tokens, if any
|
||||
* ... define new APIs, if any
|
||||
*
|
||||
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
|
||||
* This allows us to avoid having to decide whether to include GL headers or GLES here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* cl_khr_gl_event extension
|
||||
* See section 9.9 in the OpenCL 1.1 spec for more information
|
||||
*/
|
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromGLsyncKHR(cl_context /* context */,
|
||||
cl_GLsync /* cl_GLsync */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_EXT_H */
|
1278
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
generated
vendored
Normal file
1278
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
43
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
generated
vendored
Normal file
43
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
generated
vendored
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_H
|
||||
#define __OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <cl.h>
|
||||
#include <cl_gl.h>
|
||||
#include <cl_gl_ext.h>
|
||||
#include <cl_ext.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_H */
|
||||
|
83
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/image.go
generated
vendored
Normal file
83
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/image.go
generated
vendored
Normal file
|
@ -0,0 +1,83 @@
|
|||
// +build cl12
|
||||
|
||||
package cl
|
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
import (
|
||||
"image"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func (ctx *Context) CreateImage(flags MemFlag, imageFormat ImageFormat, imageDesc ImageDescription, data []byte) (*MemObject, error) {
|
||||
format := imageFormat.toCl()
|
||||
desc := imageDesc.toCl()
|
||||
var dataPtr unsafe.Pointer
|
||||
if data != nil {
|
||||
dataPtr = unsafe.Pointer(&data[0])
|
||||
}
|
||||
var err C.cl_int
|
||||
clBuffer := C.clCreateImage(ctx.clContext, C.cl_mem_flags(flags), &format, &desc, dataPtr, &err)
|
||||
if err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
if clBuffer == nil {
|
||||
return nil, ErrUnknown
|
||||
}
|
||||
return newMemObject(clBuffer, len(data)), nil
|
||||
}
|
||||
|
||||
func (ctx *Context) CreateImageSimple(flags MemFlag, width, height int, channelOrder ChannelOrder, channelDataType ChannelDataType, data []byte) (*MemObject, error) {
|
||||
format := ImageFormat{channelOrder, channelDataType}
|
||||
desc := ImageDescription{
|
||||
Type: MemObjectTypeImage2D,
|
||||
Width: width,
|
||||
Height: height,
|
||||
}
|
||||
return ctx.CreateImage(flags, format, desc, data)
|
||||
}
|
||||
|
||||
func (ctx *Context) CreateImageFromImage(flags MemFlag, img image.Image) (*MemObject, error) {
|
||||
switch m := img.(type) {
|
||||
case *image.Gray:
|
||||
format := ImageFormat{ChannelOrderIntensity, ChannelDataTypeUNormInt8}
|
||||
desc := ImageDescription{
|
||||
Type: MemObjectTypeImage2D,
|
||||
Width: m.Bounds().Dx(),
|
||||
Height: m.Bounds().Dy(),
|
||||
RowPitch: m.Stride,
|
||||
}
|
||||
return ctx.CreateImage(flags, format, desc, m.Pix)
|
||||
case *image.RGBA:
|
||||
format := ImageFormat{ChannelOrderRGBA, ChannelDataTypeUNormInt8}
|
||||
desc := ImageDescription{
|
||||
Type: MemObjectTypeImage2D,
|
||||
Width: m.Bounds().Dx(),
|
||||
Height: m.Bounds().Dy(),
|
||||
RowPitch: m.Stride,
|
||||
}
|
||||
return ctx.CreateImage(flags, format, desc, m.Pix)
|
||||
}
|
||||
|
||||
b := img.Bounds()
|
||||
w := b.Dx()
|
||||
h := b.Dy()
|
||||
data := make([]byte, w*h*4)
|
||||
dataOffset := 0
|
||||
for y := 0; y < h; y++ {
|
||||
for x := 0; x < w; x++ {
|
||||
c := img.At(x+b.Min.X, y+b.Min.Y)
|
||||
r, g, b, a := c.RGBA()
|
||||
data[dataOffset] = uint8(r >> 8)
|
||||
data[dataOffset+1] = uint8(g >> 8)
|
||||
data[dataOffset+2] = uint8(b >> 8)
|
||||
data[dataOffset+3] = uint8(a >> 8)
|
||||
dataOffset += 4
|
||||
}
|
||||
}
|
||||
return ctx.CreateImageSimple(flags, w, h, ChannelOrderRGBA, ChannelDataTypeUNormInt8, data)
|
||||
}
|
127
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel.go
generated
vendored
Normal file
127
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel.go
generated
vendored
Normal file
|
@ -0,0 +1,127 @@
|
|||
package cl
|
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type ErrUnsupportedArgumentType struct {
|
||||
Index int
|
||||
Value interface{}
|
||||
}
|
||||
|
||||
func (e ErrUnsupportedArgumentType) Error() string {
|
||||
return fmt.Sprintf("cl: unsupported argument type for index %d: %+v", e.Index, e.Value)
|
||||
}
|
||||
|
||||
type Kernel struct {
|
||||
clKernel C.cl_kernel
|
||||
name string
|
||||
}
|
||||
|
||||
type LocalBuffer int
|
||||
|
||||
func releaseKernel(k *Kernel) {
|
||||
if k.clKernel != nil {
|
||||
C.clReleaseKernel(k.clKernel)
|
||||
k.clKernel = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (k *Kernel) Release() {
|
||||
releaseKernel(k)
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArgs(args ...interface{}) error {
|
||||
for index, arg := range args {
|
||||
if err := k.SetArg(index, arg); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArg(index int, arg interface{}) error {
|
||||
switch val := arg.(type) {
|
||||
case uint8:
|
||||
return k.SetArgUint8(index, val)
|
||||
case int8:
|
||||
return k.SetArgInt8(index, val)
|
||||
case uint32:
|
||||
return k.SetArgUint32(index, val)
|
||||
case uint64:
|
||||
return k.SetArgUint64(index, val)
|
||||
case int32:
|
||||
return k.SetArgInt32(index, val)
|
||||
case float32:
|
||||
return k.SetArgFloat32(index, val)
|
||||
case *MemObject:
|
||||
return k.SetArgBuffer(index, val)
|
||||
case LocalBuffer:
|
||||
return k.SetArgLocal(index, int(val))
|
||||
default:
|
||||
return ErrUnsupportedArgumentType{Index: index, Value: arg}
|
||||
}
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArgBuffer(index int, buffer *MemObject) error {
|
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(buffer.clMem)), unsafe.Pointer(&buffer.clMem))
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArgFloat32(index int, val float32) error {
|
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArgInt8(index int, val int8) error {
|
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArgUint8(index int, val uint8) error {
|
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArgInt32(index int, val int32) error {
|
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArgUint32(index int, val uint32) error {
|
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArgUint64(index int, val uint64) error {
|
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArgLocal(index int, size int) error {
|
||||
return k.SetArgUnsafe(index, size, nil)
|
||||
}
|
||||
|
||||
func (k *Kernel) SetArgUnsafe(index, argSize int, arg unsafe.Pointer) error {
|
||||
//fmt.Println("FUNKY: ", index, argSize)
|
||||
return toError(C.clSetKernelArg(k.clKernel, C.cl_uint(index), C.size_t(argSize), arg))
|
||||
}
|
||||
|
||||
func (k *Kernel) PreferredWorkGroupSizeMultiple(device *Device) (int, error) {
|
||||
var size C.size_t
|
||||
err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil)
|
||||
return int(size), toError(err)
|
||||
}
|
||||
|
||||
func (k *Kernel) WorkGroupSize(device *Device) (int, error) {
|
||||
var size C.size_t
|
||||
err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_WORK_GROUP_SIZE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil)
|
||||
return int(size), toError(err)
|
||||
}
|
||||
|
||||
func (k *Kernel) NumArgs() (int, error) {
|
||||
var num C.cl_uint
|
||||
err := C.clGetKernelInfo(k.clKernel, C.CL_KERNEL_NUM_ARGS, C.size_t(unsafe.Sizeof(num)), unsafe.Pointer(&num), nil)
|
||||
return int(num), toError(err)
|
||||
}
|
7
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel10.go
generated
vendored
Normal file
7
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel10.go
generated
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
// +build !cl12
|
||||
|
||||
package cl
|
||||
|
||||
func (k *Kernel) ArgName(index int) (string, error) {
|
||||
return "", ErrUnsupported
|
||||
}
|
20
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel12.go
generated
vendored
Normal file
20
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel12.go
generated
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
// +build cl12
|
||||
|
||||
package cl
|
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
import "unsafe"
|
||||
|
||||
func (k *Kernel) ArgName(index int) (string, error) {
|
||||
var strC [1024]byte
|
||||
var strN C.size_t
|
||||
if err := C.clGetKernelArgInfo(k.clKernel, C.cl_uint(index), C.CL_KERNEL_ARG_NAME, 1024, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS {
|
||||
return "", toError(err)
|
||||
}
|
||||
return string(strC[:strN]), nil
|
||||
}
|
83
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/platform.go
generated
vendored
Normal file
83
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/platform.go
generated
vendored
Normal file
|
@ -0,0 +1,83 @@
|
|||
package cl
|
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
|
||||
import "unsafe"
|
||||
|
||||
const maxPlatforms = 32
|
||||
|
||||
type Platform struct {
|
||||
id C.cl_platform_id
|
||||
}
|
||||
|
||||
// Obtain the list of platforms available.
|
||||
func GetPlatforms() ([]*Platform, error) {
|
||||
var platformIds [maxPlatforms]C.cl_platform_id
|
||||
var nPlatforms C.cl_uint
|
||||
if err := C.clGetPlatformIDs(C.cl_uint(maxPlatforms), &platformIds[0], &nPlatforms); err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
platforms := make([]*Platform, nPlatforms)
|
||||
for i := 0; i < int(nPlatforms); i++ {
|
||||
platforms[i] = &Platform{id: platformIds[i]}
|
||||
}
|
||||
return platforms, nil
|
||||
}
|
||||
|
||||
func (p *Platform) GetDevices(deviceType DeviceType) ([]*Device, error) {
|
||||
return GetDevices(p, deviceType)
|
||||
}
|
||||
|
||||
func (p *Platform) getInfoString(param C.cl_platform_info) (string, error) {
|
||||
var strC [2048]byte
|
||||
var strN C.size_t
|
||||
if err := C.clGetPlatformInfo(p.id, param, 2048, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS {
|
||||
return "", toError(err)
|
||||
}
|
||||
return string(strC[:(strN - 1)]), nil
|
||||
}
|
||||
|
||||
func (p *Platform) Name() string {
|
||||
if str, err := p.getInfoString(C.CL_PLATFORM_NAME); err != nil {
|
||||
panic("Platform.Name() should never fail")
|
||||
} else {
|
||||
return str
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Platform) Vendor() string {
|
||||
if str, err := p.getInfoString(C.CL_PLATFORM_VENDOR); err != nil {
|
||||
panic("Platform.Vendor() should never fail")
|
||||
} else {
|
||||
return str
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Platform) Profile() string {
|
||||
if str, err := p.getInfoString(C.CL_PLATFORM_PROFILE); err != nil {
|
||||
panic("Platform.Profile() should never fail")
|
||||
} else {
|
||||
return str
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Platform) Version() string {
|
||||
if str, err := p.getInfoString(C.CL_PLATFORM_VERSION); err != nil {
|
||||
panic("Platform.Version() should never fail")
|
||||
} else {
|
||||
return str
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Platform) Extensions() string {
|
||||
if str, err := p.getInfoString(C.CL_PLATFORM_EXTENSIONS); err != nil {
|
||||
panic("Platform.Extensions() should never fail")
|
||||
} else {
|
||||
return str
|
||||
}
|
||||
}
|
105
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/program.go
generated
vendored
Normal file
105
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/program.go
generated
vendored
Normal file
|
@ -0,0 +1,105 @@
|
|||
package cl
|
||||
|
||||
// #include <stdlib.h>
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type BuildError struct {
|
||||
Message string
|
||||
Device *Device
|
||||
}
|
||||
|
||||
func (e BuildError) Error() string {
|
||||
if e.Device != nil {
|
||||
return fmt.Sprintf("cl: build error on %q: %s", e.Device.Name(), e.Message)
|
||||
} else {
|
||||
return fmt.Sprintf("cl: build error: %s", e.Message)
|
||||
}
|
||||
}
|
||||
|
||||
type Program struct {
|
||||
clProgram C.cl_program
|
||||
devices []*Device
|
||||
}
|
||||
|
||||
func releaseProgram(p *Program) {
|
||||
if p.clProgram != nil {
|
||||
C.clReleaseProgram(p.clProgram)
|
||||
p.clProgram = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Program) Release() {
|
||||
releaseProgram(p)
|
||||
}
|
||||
|
||||
func (p *Program) BuildProgram(devices []*Device, options string) error {
|
||||
var cOptions *C.char
|
||||
if options != "" {
|
||||
cOptions = C.CString(options)
|
||||
defer C.free(unsafe.Pointer(cOptions))
|
||||
}
|
||||
var deviceList []C.cl_device_id
|
||||
var deviceListPtr *C.cl_device_id
|
||||
numDevices := C.cl_uint(len(devices))
|
||||
if devices != nil && len(devices) > 0 {
|
||||
deviceList = buildDeviceIdList(devices)
|
||||
deviceListPtr = &deviceList[0]
|
||||
}
|
||||
if err := C.clBuildProgram(p.clProgram, numDevices, deviceListPtr, cOptions, nil, nil); err != C.CL_SUCCESS {
|
||||
buffer := make([]byte, 4096)
|
||||
var bLen C.size_t
|
||||
var err C.cl_int
|
||||
|
||||
for _, dev := range p.devices {
|
||||
for i := 2; i >= 0; i-- {
|
||||
err = C.clGetProgramBuildInfo(p.clProgram, dev.id, C.CL_PROGRAM_BUILD_LOG, C.size_t(len(buffer)), unsafe.Pointer(&buffer[0]), &bLen)
|
||||
if err == C.CL_INVALID_VALUE && i > 0 && bLen < 1024*1024 {
|
||||
// INVALID_VALUE probably means our buffer isn't large enough
|
||||
buffer = make([]byte, bLen)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if err != C.CL_SUCCESS {
|
||||
return toError(err)
|
||||
}
|
||||
|
||||
if bLen > 1 {
|
||||
return BuildError{
|
||||
Device: dev,
|
||||
Message: string(buffer[:bLen-1]),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return BuildError{
|
||||
Device: nil,
|
||||
Message: "build failed and produced no log entries",
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Program) CreateKernel(name string) (*Kernel, error) {
|
||||
cName := C.CString(name)
|
||||
defer C.free(unsafe.Pointer(cName))
|
||||
var err C.cl_int
|
||||
clKernel := C.clCreateKernel(p.clProgram, cName, &err)
|
||||
if err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
kernel := &Kernel{clKernel: clKernel, name: name}
|
||||
runtime.SetFinalizer(kernel, releaseKernel)
|
||||
return kernel, nil
|
||||
}
|
193
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/queue.go
generated
vendored
Normal file
193
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/queue.go
generated
vendored
Normal file
|
@ -0,0 +1,193 @@
|
|||
package cl
|
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
|
||||
import "unsafe"
|
||||
|
||||
type CommandQueueProperty int
|
||||
|
||||
const (
|
||||
CommandQueueOutOfOrderExecModeEnable CommandQueueProperty = C.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
|
||||
CommandQueueProfilingEnable CommandQueueProperty = C.CL_QUEUE_PROFILING_ENABLE
|
||||
)
|
||||
|
||||
type CommandQueue struct {
|
||||
clQueue C.cl_command_queue
|
||||
device *Device
|
||||
}
|
||||
|
||||
func releaseCommandQueue(q *CommandQueue) {
|
||||
if q.clQueue != nil {
|
||||
C.clReleaseCommandQueue(q.clQueue)
|
||||
q.clQueue = nil
|
||||
}
|
||||
}
|
||||
|
||||
// Call clReleaseCommandQueue on the CommandQueue. Using the CommandQueue after Release will cause a panick.
|
||||
func (q *CommandQueue) Release() {
|
||||
releaseCommandQueue(q)
|
||||
}
|
||||
|
||||
// Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
|
||||
func (q *CommandQueue) Finish() error {
|
||||
return toError(C.clFinish(q.clQueue))
|
||||
}
|
||||
|
||||
// Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
|
||||
func (q *CommandQueue) Flush() error {
|
||||
return toError(C.clFlush(q.clQueue))
|
||||
}
|
||||
|
||||
// Enqueues a command to map a region of the buffer object given by buffer into the host address space and returns a pointer to this mapped region.
|
||||
func (q *CommandQueue) EnqueueMapBuffer(buffer *MemObject, blocking bool, flags MapFlag, offset, size int, eventWaitList []*Event) (*MappedMemObject, *Event, error) {
|
||||
var event C.cl_event
|
||||
var err C.cl_int
|
||||
ptr := C.clEnqueueMapBuffer(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err)
|
||||
if err != C.CL_SUCCESS {
|
||||
return nil, nil, toError(err)
|
||||
}
|
||||
ev := newEvent(event)
|
||||
if ptr == nil {
|
||||
return nil, ev, ErrUnknown
|
||||
}
|
||||
return &MappedMemObject{ptr: ptr, size: size}, ev, nil
|
||||
}
|
||||
|
||||
// Enqueues a command to map a region of an image object into the host address space and returns a pointer to this mapped region.
|
||||
func (q *CommandQueue) EnqueueMapImage(buffer *MemObject, blocking bool, flags MapFlag, origin, region [3]int, eventWaitList []*Event) (*MappedMemObject, *Event, error) {
|
||||
cOrigin := sizeT3(origin)
|
||||
cRegion := sizeT3(region)
|
||||
var event C.cl_event
|
||||
var err C.cl_int
|
||||
var rowPitch, slicePitch C.size_t
|
||||
ptr := C.clEnqueueMapImage(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), &cOrigin[0], &cRegion[0], &rowPitch, &slicePitch, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err)
|
||||
if err != C.CL_SUCCESS {
|
||||
return nil, nil, toError(err)
|
||||
}
|
||||
ev := newEvent(event)
|
||||
if ptr == nil {
|
||||
return nil, ev, ErrUnknown
|
||||
}
|
||||
size := 0 // TODO: could calculate this
|
||||
return &MappedMemObject{ptr: ptr, size: size, rowPitch: int(rowPitch), slicePitch: int(slicePitch)}, ev, nil
|
||||
}
|
||||
|
||||
// Enqueues a command to unmap a previously mapped region of a memory object.
|
||||
func (q *CommandQueue) EnqueueUnmapMemObject(buffer *MemObject, mappedObj *MappedMemObject, eventWaitList []*Event) (*Event, error) {
|
||||
var event C.cl_event
|
||||
if err := C.clEnqueueUnmapMemObject(q.clQueue, buffer.clMem, mappedObj.ptr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event); err != C.CL_SUCCESS {
|
||||
return nil, toError(err)
|
||||
}
|
||||
return newEvent(event), nil
|
||||
}
|
||||
|
||||
// Enqueues a command to copy a buffer object to another buffer object.
|
||||
func (q *CommandQueue) EnqueueCopyBuffer(srcBuffer, dstBuffer *MemObject, srcOffset, dstOffset, byteCount int, eventWaitList []*Event) (*Event, error) {
|
||||
var event C.cl_event
|
||||
err := toError(C.clEnqueueCopyBuffer(q.clQueue, srcBuffer.clMem, dstBuffer.clMem, C.size_t(srcOffset), C.size_t(dstOffset), C.size_t(byteCount), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
|
||||
return newEvent(event), err
|
||||
}
|
||||
|
||||
// Enqueue commands to write to a buffer object from host memory.
|
||||
func (q *CommandQueue) EnqueueWriteBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) {
|
||||
var event C.cl_event
|
||||
err := toError(C.clEnqueueWriteBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
|
||||
return newEvent(event), err
|
||||
}
|
||||
|
||||
func (q *CommandQueue) EnqueueWriteBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) {
|
||||
dataPtr := unsafe.Pointer(&data[0])
|
||||
dataSize := int(unsafe.Sizeof(data[0])) * len(data)
|
||||
return q.EnqueueWriteBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList)
|
||||
}
|
||||
|
||||
// Enqueue commands to read from a buffer object to host memory.
|
||||
func (q *CommandQueue) EnqueueReadBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) {
|
||||
var event C.cl_event
|
||||
err := toError(C.clEnqueueReadBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
|
||||
return newEvent(event), err
|
||||
}
|
||||
|
||||
func (q *CommandQueue) EnqueueReadBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) {
|
||||
dataPtr := unsafe.Pointer(&data[0])
|
||||
dataSize := int(unsafe.Sizeof(data[0])) * len(data)
|
||||
return q.EnqueueReadBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList)
|
||||
}
|
||||
|
||||
// Enqueues a command to execute a kernel on a device.
|
||||
func (q *CommandQueue) EnqueueNDRangeKernel(kernel *Kernel, globalWorkOffset, globalWorkSize, localWorkSize []int, eventWaitList []*Event) (*Event, error) {
|
||||
workDim := len(globalWorkSize)
|
||||
var globalWorkOffsetList []C.size_t
|
||||
var globalWorkOffsetPtr *C.size_t
|
||||
if globalWorkOffset != nil {
|
||||
globalWorkOffsetList = make([]C.size_t, len(globalWorkOffset))
|
||||
for i, off := range globalWorkOffset {
|
||||
globalWorkOffsetList[i] = C.size_t(off)
|
||||
}
|
||||
globalWorkOffsetPtr = &globalWorkOffsetList[0]
|
||||
}
|
||||
var globalWorkSizeList []C.size_t
|
||||
var globalWorkSizePtr *C.size_t
|
||||
if globalWorkSize != nil {
|
||||
globalWorkSizeList = make([]C.size_t, len(globalWorkSize))
|
||||
for i, off := range globalWorkSize {
|
||||
globalWorkSizeList[i] = C.size_t(off)
|
||||
}
|
||||
globalWorkSizePtr = &globalWorkSizeList[0]
|
||||
}
|
||||
var localWorkSizeList []C.size_t
|
||||
var localWorkSizePtr *C.size_t
|
||||
if localWorkSize != nil {
|
||||
localWorkSizeList = make([]C.size_t, len(localWorkSize))
|
||||
for i, off := range localWorkSize {
|
||||
localWorkSizeList[i] = C.size_t(off)
|
||||
}
|
||||
localWorkSizePtr = &localWorkSizeList[0]
|
||||
}
|
||||
var event C.cl_event
|
||||
err := toError(C.clEnqueueNDRangeKernel(q.clQueue, kernel.clKernel, C.cl_uint(workDim), globalWorkOffsetPtr, globalWorkSizePtr, localWorkSizePtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
|
||||
return newEvent(event), err
|
||||
}
|
||||
|
||||
// Enqueues a command to read from a 2D or 3D image object to host memory.
|
||||
func (q *CommandQueue) EnqueueReadImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) {
|
||||
cOrigin := sizeT3(origin)
|
||||
cRegion := sizeT3(region)
|
||||
var event C.cl_event
|
||||
err := toError(C.clEnqueueReadImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
|
||||
return newEvent(event), err
|
||||
}
|
||||
|
||||
// Enqueues a command to write from a 2D or 3D image object to host memory.
|
||||
func (q *CommandQueue) EnqueueWriteImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) {
|
||||
cOrigin := sizeT3(origin)
|
||||
cRegion := sizeT3(region)
|
||||
var event C.cl_event
|
||||
err := toError(C.clEnqueueWriteImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
|
||||
return newEvent(event), err
|
||||
}
|
||||
|
||||
func (q *CommandQueue) EnqueueFillBuffer(buffer *MemObject, pattern unsafe.Pointer, patternSize, offset, size int, eventWaitList []*Event) (*Event, error) {
|
||||
var event C.cl_event
|
||||
err := toError(C.clEnqueueFillBuffer(q.clQueue, buffer.clMem, pattern, C.size_t(patternSize), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
|
||||
return newEvent(event), err
|
||||
}
|
||||
|
||||
// A synchronization point that enqueues a barrier operation.
|
||||
func (q *CommandQueue) EnqueueBarrierWithWaitList(eventWaitList []*Event) (*Event, error) {
|
||||
var event C.cl_event
|
||||
err := toError(C.clEnqueueBarrierWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
|
||||
return newEvent(event), err
|
||||
}
|
||||
|
||||
// Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.
|
||||
func (q *CommandQueue) EnqueueMarkerWithWaitList(eventWaitList []*Event) (*Event, error) {
|
||||
var event C.cl_event
|
||||
err := toError(C.clEnqueueMarkerWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
|
||||
return newEvent(event), err
|
||||
}
|
487
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types.go
generated
vendored
Normal file
487
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types.go
generated
vendored
Normal file
|
@ -0,0 +1,487 @@
|
|||
package cl
|
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strings"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrUnknown = errors.New("cl: unknown error") // Generally an unexpected result from an OpenCL function (e.g. CL_SUCCESS but null pointer)
|
||||
)
|
||||
|
||||
type ErrOther int
|
||||
|
||||
func (e ErrOther) Error() string {
|
||||
return fmt.Sprintf("cl: error %d", int(e))
|
||||
}
|
||||
|
||||
var (
|
||||
ErrDeviceNotFound = errors.New("cl: Device Not Found")
|
||||
ErrDeviceNotAvailable = errors.New("cl: Device Not Available")
|
||||
ErrCompilerNotAvailable = errors.New("cl: Compiler Not Available")
|
||||
ErrMemObjectAllocationFailure = errors.New("cl: Mem Object Allocation Failure")
|
||||
ErrOutOfResources = errors.New("cl: Out Of Resources")
|
||||
ErrOutOfHostMemory = errors.New("cl: Out Of Host Memory")
|
||||
ErrProfilingInfoNotAvailable = errors.New("cl: Profiling Info Not Available")
|
||||
ErrMemCopyOverlap = errors.New("cl: Mem Copy Overlap")
|
||||
ErrImageFormatMismatch = errors.New("cl: Image Format Mismatch")
|
||||
ErrImageFormatNotSupported = errors.New("cl: Image Format Not Supported")
|
||||
ErrBuildProgramFailure = errors.New("cl: Build Program Failure")
|
||||
ErrMapFailure = errors.New("cl: Map Failure")
|
||||
ErrMisalignedSubBufferOffset = errors.New("cl: Misaligned Sub Buffer Offset")
|
||||
ErrExecStatusErrorForEventsInWaitList = errors.New("cl: Exec Status Error For Events In Wait List")
|
||||
ErrCompileProgramFailure = errors.New("cl: Compile Program Failure")
|
||||
ErrLinkerNotAvailable = errors.New("cl: Linker Not Available")
|
||||
ErrLinkProgramFailure = errors.New("cl: Link Program Failure")
|
||||
ErrDevicePartitionFailed = errors.New("cl: Device Partition Failed")
|
||||
ErrKernelArgInfoNotAvailable = errors.New("cl: Kernel Arg Info Not Available")
|
||||
ErrInvalidValue = errors.New("cl: Invalid Value")
|
||||
ErrInvalidDeviceType = errors.New("cl: Invalid Device Type")
|
||||
ErrInvalidPlatform = errors.New("cl: Invalid Platform")
|
||||
ErrInvalidDevice = errors.New("cl: Invalid Device")
|
||||
ErrInvalidContext = errors.New("cl: Invalid Context")
|
||||
ErrInvalidQueueProperties = errors.New("cl: Invalid Queue Properties")
|
||||
ErrInvalidCommandQueue = errors.New("cl: Invalid Command Queue")
|
||||
ErrInvalidHostPtr = errors.New("cl: Invalid Host Ptr")
|
||||
ErrInvalidMemObject = errors.New("cl: Invalid Mem Object")
|
||||
ErrInvalidImageFormatDescriptor = errors.New("cl: Invalid Image Format Descriptor")
|
||||
ErrInvalidImageSize = errors.New("cl: Invalid Image Size")
|
||||
ErrInvalidSampler = errors.New("cl: Invalid Sampler")
|
||||
ErrInvalidBinary = errors.New("cl: Invalid Binary")
|
||||
ErrInvalidBuildOptions = errors.New("cl: Invalid Build Options")
|
||||
ErrInvalidProgram = errors.New("cl: Invalid Program")
|
||||
ErrInvalidProgramExecutable = errors.New("cl: Invalid Program Executable")
|
||||
ErrInvalidKernelName = errors.New("cl: Invalid Kernel Name")
|
||||
ErrInvalidKernelDefinition = errors.New("cl: Invalid Kernel Definition")
|
||||
ErrInvalidKernel = errors.New("cl: Invalid Kernel")
|
||||
ErrInvalidArgIndex = errors.New("cl: Invalid Arg Index")
|
||||
ErrInvalidArgValue = errors.New("cl: Invalid Arg Value")
|
||||
ErrInvalidArgSize = errors.New("cl: Invalid Arg Size")
|
||||
ErrInvalidKernelArgs = errors.New("cl: Invalid Kernel Args")
|
||||
ErrInvalidWorkDimension = errors.New("cl: Invalid Work Dimension")
|
||||
ErrInvalidWorkGroupSize = errors.New("cl: Invalid Work Group Size")
|
||||
ErrInvalidWorkItemSize = errors.New("cl: Invalid Work Item Size")
|
||||
ErrInvalidGlobalOffset = errors.New("cl: Invalid Global Offset")
|
||||
ErrInvalidEventWaitList = errors.New("cl: Invalid Event Wait List")
|
||||
ErrInvalidEvent = errors.New("cl: Invalid Event")
|
||||
ErrInvalidOperation = errors.New("cl: Invalid Operation")
|
||||
ErrInvalidGlObject = errors.New("cl: Invalid Gl Object")
|
||||
ErrInvalidBufferSize = errors.New("cl: Invalid Buffer Size")
|
||||
ErrInvalidMipLevel = errors.New("cl: Invalid Mip Level")
|
||||
ErrInvalidGlobalWorkSize = errors.New("cl: Invalid Global Work Size")
|
||||
ErrInvalidProperty = errors.New("cl: Invalid Property")
|
||||
ErrInvalidImageDescriptor = errors.New("cl: Invalid Image Descriptor")
|
||||
ErrInvalidCompilerOptions = errors.New("cl: Invalid Compiler Options")
|
||||
ErrInvalidLinkerOptions = errors.New("cl: Invalid Linker Options")
|
||||
ErrInvalidDevicePartitionCount = errors.New("cl: Invalid Device Partition Count")
|
||||
)
|
||||
var errorMap = map[C.cl_int]error{
|
||||
C.CL_SUCCESS: nil,
|
||||
C.CL_DEVICE_NOT_FOUND: ErrDeviceNotFound,
|
||||
C.CL_DEVICE_NOT_AVAILABLE: ErrDeviceNotAvailable,
|
||||
C.CL_COMPILER_NOT_AVAILABLE: ErrCompilerNotAvailable,
|
||||
C.CL_MEM_OBJECT_ALLOCATION_FAILURE: ErrMemObjectAllocationFailure,
|
||||
C.CL_OUT_OF_RESOURCES: ErrOutOfResources,
|
||||
C.CL_OUT_OF_HOST_MEMORY: ErrOutOfHostMemory,
|
||||
C.CL_PROFILING_INFO_NOT_AVAILABLE: ErrProfilingInfoNotAvailable,
|
||||
C.CL_MEM_COPY_OVERLAP: ErrMemCopyOverlap,
|
||||
C.CL_IMAGE_FORMAT_MISMATCH: ErrImageFormatMismatch,
|
||||
C.CL_IMAGE_FORMAT_NOT_SUPPORTED: ErrImageFormatNotSupported,
|
||||
C.CL_BUILD_PROGRAM_FAILURE: ErrBuildProgramFailure,
|
||||
C.CL_MAP_FAILURE: ErrMapFailure,
|
||||
C.CL_MISALIGNED_SUB_BUFFER_OFFSET: ErrMisalignedSubBufferOffset,
|
||||
C.CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: ErrExecStatusErrorForEventsInWaitList,
|
||||
C.CL_INVALID_VALUE: ErrInvalidValue,
|
||||
C.CL_INVALID_DEVICE_TYPE: ErrInvalidDeviceType,
|
||||
C.CL_INVALID_PLATFORM: ErrInvalidPlatform,
|
||||
C.CL_INVALID_DEVICE: ErrInvalidDevice,
|
||||
C.CL_INVALID_CONTEXT: ErrInvalidContext,
|
||||
C.CL_INVALID_QUEUE_PROPERTIES: ErrInvalidQueueProperties,
|
||||
C.CL_INVALID_COMMAND_QUEUE: ErrInvalidCommandQueue,
|
||||
C.CL_INVALID_HOST_PTR: ErrInvalidHostPtr,
|
||||
C.CL_INVALID_MEM_OBJECT: ErrInvalidMemObject,
|
||||
C.CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: ErrInvalidImageFormatDescriptor,
|
||||
C.CL_INVALID_IMAGE_SIZE: ErrInvalidImageSize,
|
||||
C.CL_INVALID_SAMPLER: ErrInvalidSampler,
|
||||
C.CL_INVALID_BINARY: ErrInvalidBinary,
|
||||
C.CL_INVALID_BUILD_OPTIONS: ErrInvalidBuildOptions,
|
||||
C.CL_INVALID_PROGRAM: ErrInvalidProgram,
|
||||
C.CL_INVALID_PROGRAM_EXECUTABLE: ErrInvalidProgramExecutable,
|
||||
C.CL_INVALID_KERNEL_NAME: ErrInvalidKernelName,
|
||||
C.CL_INVALID_KERNEL_DEFINITION: ErrInvalidKernelDefinition,
|
||||
C.CL_INVALID_KERNEL: ErrInvalidKernel,
|
||||
C.CL_INVALID_ARG_INDEX: ErrInvalidArgIndex,
|
||||
C.CL_INVALID_ARG_VALUE: ErrInvalidArgValue,
|
||||
C.CL_INVALID_ARG_SIZE: ErrInvalidArgSize,
|
||||
C.CL_INVALID_KERNEL_ARGS: ErrInvalidKernelArgs,
|
||||
C.CL_INVALID_WORK_DIMENSION: ErrInvalidWorkDimension,
|
||||
C.CL_INVALID_WORK_GROUP_SIZE: ErrInvalidWorkGroupSize,
|
||||
C.CL_INVALID_WORK_ITEM_SIZE: ErrInvalidWorkItemSize,
|
||||
C.CL_INVALID_GLOBAL_OFFSET: ErrInvalidGlobalOffset,
|
||||
C.CL_INVALID_EVENT_WAIT_LIST: ErrInvalidEventWaitList,
|
||||
C.CL_INVALID_EVENT: ErrInvalidEvent,
|
||||
C.CL_INVALID_OPERATION: ErrInvalidOperation,
|
||||
C.CL_INVALID_GL_OBJECT: ErrInvalidGlObject,
|
||||
C.CL_INVALID_BUFFER_SIZE: ErrInvalidBufferSize,
|
||||
C.CL_INVALID_MIP_LEVEL: ErrInvalidMipLevel,
|
||||
C.CL_INVALID_GLOBAL_WORK_SIZE: ErrInvalidGlobalWorkSize,
|
||||
C.CL_INVALID_PROPERTY: ErrInvalidProperty,
|
||||
}
|
||||
|
||||
func toError(code C.cl_int) error {
|
||||
if err, ok := errorMap[code]; ok {
|
||||
return err
|
||||
}
|
||||
return ErrOther(code)
|
||||
}
|
||||
|
||||
type LocalMemType int
|
||||
|
||||
const (
|
||||
LocalMemTypeNone LocalMemType = C.CL_NONE
|
||||
LocalMemTypeGlobal LocalMemType = C.CL_GLOBAL
|
||||
LocalMemTypeLocal LocalMemType = C.CL_LOCAL
|
||||
)
|
||||
|
||||
var localMemTypeMap = map[LocalMemType]string{
|
||||
LocalMemTypeNone: "None",
|
||||
LocalMemTypeGlobal: "Global",
|
||||
LocalMemTypeLocal: "Local",
|
||||
}
|
||||
|
||||
func (t LocalMemType) String() string {
|
||||
name := localMemTypeMap[t]
|
||||
if name == "" {
|
||||
name = "Unknown"
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
type ExecCapability int
|
||||
|
||||
const (
|
||||
ExecCapabilityKernel ExecCapability = C.CL_EXEC_KERNEL // The OpenCL device can execute OpenCL kernels.
|
||||
ExecCapabilityNativeKernel ExecCapability = C.CL_EXEC_NATIVE_KERNEL // The OpenCL device can execute native kernels.
|
||||
)
|
||||
|
||||
func (ec ExecCapability) String() string {
|
||||
var parts []string
|
||||
if ec&ExecCapabilityKernel != 0 {
|
||||
parts = append(parts, "Kernel")
|
||||
}
|
||||
if ec&ExecCapabilityNativeKernel != 0 {
|
||||
parts = append(parts, "NativeKernel")
|
||||
}
|
||||
if parts == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.Join(parts, "|")
|
||||
}
|
||||
|
||||
type MemCacheType int
|
||||
|
||||
const (
|
||||
MemCacheTypeNone MemCacheType = C.CL_NONE
|
||||
MemCacheTypeReadOnlyCache MemCacheType = C.CL_READ_ONLY_CACHE
|
||||
MemCacheTypeReadWriteCache MemCacheType = C.CL_READ_WRITE_CACHE
|
||||
)
|
||||
|
||||
func (ct MemCacheType) String() string {
|
||||
switch ct {
|
||||
case MemCacheTypeNone:
|
||||
return "None"
|
||||
case MemCacheTypeReadOnlyCache:
|
||||
return "ReadOnly"
|
||||
case MemCacheTypeReadWriteCache:
|
||||
return "ReadWrite"
|
||||
}
|
||||
return fmt.Sprintf("Unknown(%x)", int(ct))
|
||||
}
|
||||
|
||||
type MemFlag int
|
||||
|
||||
const (
|
||||
MemReadWrite MemFlag = C.CL_MEM_READ_WRITE
|
||||
MemWriteOnly MemFlag = C.CL_MEM_WRITE_ONLY
|
||||
MemReadOnly MemFlag = C.CL_MEM_READ_ONLY
|
||||
MemUseHostPtr MemFlag = C.CL_MEM_USE_HOST_PTR
|
||||
MemAllocHostPtr MemFlag = C.CL_MEM_ALLOC_HOST_PTR
|
||||
MemCopyHostPtr MemFlag = C.CL_MEM_COPY_HOST_PTR
|
||||
|
||||
MemWriteOnlyHost MemFlag = C.CL_MEM_HOST_WRITE_ONLY
|
||||
MemReadOnlyHost MemFlag = C.CL_MEM_HOST_READ_ONLY
|
||||
MemNoAccessHost MemFlag = C.CL_MEM_HOST_NO_ACCESS
|
||||
)
|
||||
|
||||
type MemObjectType int
|
||||
|
||||
const (
|
||||
MemObjectTypeBuffer MemObjectType = C.CL_MEM_OBJECT_BUFFER
|
||||
MemObjectTypeImage2D MemObjectType = C.CL_MEM_OBJECT_IMAGE2D
|
||||
MemObjectTypeImage3D MemObjectType = C.CL_MEM_OBJECT_IMAGE3D
|
||||
)
|
||||
|
||||
type MapFlag int
|
||||
|
||||
const (
|
||||
// This flag specifies that the region being mapped in the memory object is being mapped for reading.
|
||||
MapFlagRead MapFlag = C.CL_MAP_READ
|
||||
MapFlagWrite MapFlag = C.CL_MAP_WRITE
|
||||
MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION
|
||||
)
|
||||
|
||||
func (mf MapFlag) toCl() C.cl_map_flags {
|
||||
return C.cl_map_flags(mf)
|
||||
}
|
||||
|
||||
type ChannelOrder int
|
||||
|
||||
const (
|
||||
ChannelOrderR ChannelOrder = C.CL_R
|
||||
ChannelOrderA ChannelOrder = C.CL_A
|
||||
ChannelOrderRG ChannelOrder = C.CL_RG
|
||||
ChannelOrderRA ChannelOrder = C.CL_RA
|
||||
ChannelOrderRGB ChannelOrder = C.CL_RGB
|
||||
ChannelOrderRGBA ChannelOrder = C.CL_RGBA
|
||||
ChannelOrderBGRA ChannelOrder = C.CL_BGRA
|
||||
ChannelOrderARGB ChannelOrder = C.CL_ARGB
|
||||
ChannelOrderIntensity ChannelOrder = C.CL_INTENSITY
|
||||
ChannelOrderLuminance ChannelOrder = C.CL_LUMINANCE
|
||||
ChannelOrderRx ChannelOrder = C.CL_Rx
|
||||
ChannelOrderRGx ChannelOrder = C.CL_RGx
|
||||
ChannelOrderRGBx ChannelOrder = C.CL_RGBx
|
||||
)
|
||||
|
||||
var channelOrderNameMap = map[ChannelOrder]string{
|
||||
ChannelOrderR: "R",
|
||||
ChannelOrderA: "A",
|
||||
ChannelOrderRG: "RG",
|
||||
ChannelOrderRA: "RA",
|
||||
ChannelOrderRGB: "RGB",
|
||||
ChannelOrderRGBA: "RGBA",
|
||||
ChannelOrderBGRA: "BGRA",
|
||||
ChannelOrderARGB: "ARGB",
|
||||
ChannelOrderIntensity: "Intensity",
|
||||
ChannelOrderLuminance: "Luminance",
|
||||
ChannelOrderRx: "Rx",
|
||||
ChannelOrderRGx: "RGx",
|
||||
ChannelOrderRGBx: "RGBx",
|
||||
}
|
||||
|
||||
func (co ChannelOrder) String() string {
|
||||
name := channelOrderNameMap[co]
|
||||
if name == "" {
|
||||
name = fmt.Sprintf("Unknown(%x)", int(co))
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
type ChannelDataType int
|
||||
|
||||
const (
|
||||
ChannelDataTypeSNormInt8 ChannelDataType = C.CL_SNORM_INT8
|
||||
ChannelDataTypeSNormInt16 ChannelDataType = C.CL_SNORM_INT16
|
||||
ChannelDataTypeUNormInt8 ChannelDataType = C.CL_UNORM_INT8
|
||||
ChannelDataTypeUNormInt16 ChannelDataType = C.CL_UNORM_INT16
|
||||
ChannelDataTypeUNormShort565 ChannelDataType = C.CL_UNORM_SHORT_565
|
||||
ChannelDataTypeUNormShort555 ChannelDataType = C.CL_UNORM_SHORT_555
|
||||
ChannelDataTypeUNormInt101010 ChannelDataType = C.CL_UNORM_INT_101010
|
||||
ChannelDataTypeSignedInt8 ChannelDataType = C.CL_SIGNED_INT8
|
||||
ChannelDataTypeSignedInt16 ChannelDataType = C.CL_SIGNED_INT16
|
||||
ChannelDataTypeSignedInt32 ChannelDataType = C.CL_SIGNED_INT32
|
||||
ChannelDataTypeUnsignedInt8 ChannelDataType = C.CL_UNSIGNED_INT8
|
||||
ChannelDataTypeUnsignedInt16 ChannelDataType = C.CL_UNSIGNED_INT16
|
||||
ChannelDataTypeUnsignedInt32 ChannelDataType = C.CL_UNSIGNED_INT32
|
||||
ChannelDataTypeHalfFloat ChannelDataType = C.CL_HALF_FLOAT
|
||||
ChannelDataTypeFloat ChannelDataType = C.CL_FLOAT
|
||||
)
|
||||
|
||||
var channelDataTypeNameMap = map[ChannelDataType]string{
|
||||
ChannelDataTypeSNormInt8: "SNormInt8",
|
||||
ChannelDataTypeSNormInt16: "SNormInt16",
|
||||
ChannelDataTypeUNormInt8: "UNormInt8",
|
||||
ChannelDataTypeUNormInt16: "UNormInt16",
|
||||
ChannelDataTypeUNormShort565: "UNormShort565",
|
||||
ChannelDataTypeUNormShort555: "UNormShort555",
|
||||
ChannelDataTypeUNormInt101010: "UNormInt101010",
|
||||
ChannelDataTypeSignedInt8: "SignedInt8",
|
||||
ChannelDataTypeSignedInt16: "SignedInt16",
|
||||
ChannelDataTypeSignedInt32: "SignedInt32",
|
||||
ChannelDataTypeUnsignedInt8: "UnsignedInt8",
|
||||
ChannelDataTypeUnsignedInt16: "UnsignedInt16",
|
||||
ChannelDataTypeUnsignedInt32: "UnsignedInt32",
|
||||
ChannelDataTypeHalfFloat: "HalfFloat",
|
||||
ChannelDataTypeFloat: "Float",
|
||||
}
|
||||
|
||||
func (ct ChannelDataType) String() string {
|
||||
name := channelDataTypeNameMap[ct]
|
||||
if name == "" {
|
||||
name = fmt.Sprintf("Unknown(%x)", int(ct))
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
type ImageFormat struct {
|
||||
ChannelOrder ChannelOrder
|
||||
ChannelDataType ChannelDataType
|
||||
}
|
||||
|
||||
func (f ImageFormat) toCl() C.cl_image_format {
|
||||
var format C.cl_image_format
|
||||
format.image_channel_order = C.cl_channel_order(f.ChannelOrder)
|
||||
format.image_channel_data_type = C.cl_channel_type(f.ChannelDataType)
|
||||
return format
|
||||
}
|
||||
|
||||
type ProfilingInfo int
|
||||
|
||||
const (
|
||||
// A 64-bit value that describes the current device time counter in
|
||||
// nanoseconds when the command identified by event is enqueued in
|
||||
// a command-queue by the host.
|
||||
ProfilingInfoCommandQueued ProfilingInfo = C.CL_PROFILING_COMMAND_QUEUED
|
||||
// A 64-bit value that describes the current device time counter in
|
||||
// nanoseconds when the command identified by event that has been
|
||||
// enqueued is submitted by the host to the device associated with the command-queue.
|
||||
ProfilingInfoCommandSubmit ProfilingInfo = C.CL_PROFILING_COMMAND_SUBMIT
|
||||
// A 64-bit value that describes the current device time counter in
|
||||
// nanoseconds when the command identified by event starts execution on the device.
|
||||
ProfilingInfoCommandStart ProfilingInfo = C.CL_PROFILING_COMMAND_START
|
||||
// A 64-bit value that describes the current device time counter in
|
||||
// nanoseconds when the command identified by event has finished
|
||||
// execution on the device.
|
||||
ProfilingInfoCommandEnd ProfilingInfo = C.CL_PROFILING_COMMAND_END
|
||||
)
|
||||
|
||||
type CommmandExecStatus int
|
||||
|
||||
const (
|
||||
CommmandExecStatusComplete CommmandExecStatus = C.CL_COMPLETE
|
||||
CommmandExecStatusRunning CommmandExecStatus = C.CL_RUNNING
|
||||
CommmandExecStatusSubmitted CommmandExecStatus = C.CL_SUBMITTED
|
||||
CommmandExecStatusQueued CommmandExecStatus = C.CL_QUEUED
|
||||
)
|
||||
|
||||
type Event struct {
|
||||
clEvent C.cl_event
|
||||
}
|
||||
|
||||
func releaseEvent(ev *Event) {
|
||||
if ev.clEvent != nil {
|
||||
C.clReleaseEvent(ev.clEvent)
|
||||
ev.clEvent = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Event) Release() {
|
||||
releaseEvent(e)
|
||||
}
|
||||
|
||||
func (e *Event) GetEventProfilingInfo(paramName ProfilingInfo) (int64, error) {
|
||||
var paramValue C.cl_ulong
|
||||
if err := C.clGetEventProfilingInfo(e.clEvent, C.cl_profiling_info(paramName), C.size_t(unsafe.Sizeof(paramValue)), unsafe.Pointer(¶mValue), nil); err != C.CL_SUCCESS {
|
||||
return 0, toError(err)
|
||||
}
|
||||
return int64(paramValue), nil
|
||||
}
|
||||
|
||||
// Sets the execution status of a user event object.
|
||||
//
|
||||
// `status` specifies the new execution status to be set and
|
||||
// can be CL_COMPLETE or a negative integer value to indicate
|
||||
// an error. A negative integer value causes all enqueued commands
|
||||
// that wait on this user event to be terminated. clSetUserEventStatus
|
||||
// can only be called once to change the execution status of event.
|
||||
func (e *Event) SetUserEventStatus(status int) error {
|
||||
return toError(C.clSetUserEventStatus(e.clEvent, C.cl_int(status)))
|
||||
}
|
||||
|
||||
// Waits on the host thread for commands identified by event objects in
|
||||
// events to complete. A command is considered complete if its execution
|
||||
// status is CL_COMPLETE or a negative value. The events specified in
|
||||
// event_list act as synchronization points.
|
||||
//
|
||||
// If the cl_khr_gl_event extension is enabled, event objects can also be
|
||||
// used to reflect the status of an OpenGL sync object. The sync object
|
||||
// in turn refers to a fence command executing in an OpenGL command
|
||||
// stream. This provides another method of coordinating sharing of buffers
|
||||
// and images between OpenGL and OpenCL.
|
||||
func WaitForEvents(events []*Event) error {
|
||||
return toError(C.clWaitForEvents(C.cl_uint(len(events)), eventListPtr(events)))
|
||||
}
|
||||
|
||||
func newEvent(clEvent C.cl_event) *Event {
|
||||
ev := &Event{clEvent: clEvent}
|
||||
runtime.SetFinalizer(ev, releaseEvent)
|
||||
return ev
|
||||
}
|
||||
|
||||
func eventListPtr(el []*Event) *C.cl_event {
|
||||
if el == nil {
|
||||
return nil
|
||||
}
|
||||
elist := make([]C.cl_event, len(el))
|
||||
for i, e := range el {
|
||||
elist[i] = e.clEvent
|
||||
}
|
||||
return (*C.cl_event)(&elist[0])
|
||||
}
|
||||
|
||||
func clBool(b bool) C.cl_bool {
|
||||
if b {
|
||||
return C.CL_TRUE
|
||||
}
|
||||
return C.CL_FALSE
|
||||
}
|
||||
|
||||
func sizeT3(i3 [3]int) [3]C.size_t {
|
||||
var val [3]C.size_t
|
||||
val[0] = C.size_t(i3[0])
|
||||
val[1] = C.size_t(i3[1])
|
||||
val[2] = C.size_t(i3[2])
|
||||
return val
|
||||
}
|
||||
|
||||
type MappedMemObject struct {
|
||||
ptr unsafe.Pointer
|
||||
size int
|
||||
rowPitch int
|
||||
slicePitch int
|
||||
}
|
||||
|
||||
func (mb *MappedMemObject) ByteSlice() []byte {
|
||||
var byteSlice []byte
|
||||
sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&byteSlice))
|
||||
sliceHeader.Cap = mb.size
|
||||
sliceHeader.Len = mb.size
|
||||
sliceHeader.Data = uintptr(mb.ptr)
|
||||
return byteSlice
|
||||
}
|
||||
|
||||
func (mb *MappedMemObject) Ptr() unsafe.Pointer {
|
||||
return mb.ptr
|
||||
}
|
||||
|
||||
func (mb *MappedMemObject) Size() int {
|
||||
return mb.size
|
||||
}
|
||||
|
||||
func (mb *MappedMemObject) RowPitch() int {
|
||||
return mb.rowPitch
|
||||
}
|
||||
|
||||
func (mb *MappedMemObject) SlicePitch() int {
|
||||
return mb.slicePitch
|
||||
}
|
71
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types12.go
generated
vendored
Normal file
71
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types12.go
generated
vendored
Normal file
|
@ -0,0 +1,71 @@
|
|||
// +build cl12
|
||||
|
||||
package cl
|
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
|
||||
const (
|
||||
ChannelDataTypeUNormInt24 ChannelDataType = C.CL_UNORM_INT24
|
||||
ChannelOrderDepth ChannelOrder = C.CL_DEPTH
|
||||
ChannelOrderDepthStencil ChannelOrder = C.CL_DEPTH_STENCIL
|
||||
MemHostNoAccess MemFlag = C.CL_MEM_HOST_NO_ACCESS // OpenCL 1.2
|
||||
MemHostReadOnly MemFlag = C.CL_MEM_HOST_READ_ONLY // OpenCL 1.2
|
||||
MemHostWriteOnly MemFlag = C.CL_MEM_HOST_WRITE_ONLY // OpenCL 1.2
|
||||
MemObjectTypeImage1D MemObjectType = C.CL_MEM_OBJECT_IMAGE1D
|
||||
MemObjectTypeImage1DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_ARRAY
|
||||
MemObjectTypeImage1DBuffer MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_BUFFER
|
||||
MemObjectTypeImage2DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE2D_ARRAY
|
||||
// This flag specifies that the region being mapped in the memory object is being mapped for writing.
|
||||
//
|
||||
// The contents of the region being mapped are to be discarded. This is typically the case when the
|
||||
// region being mapped is overwritten by the host. This flag allows the implementation to no longer
|
||||
// guarantee that the pointer returned by clEnqueueMapBuffer or clEnqueueMapImage contains the
|
||||
// latest bits in the region being mapped which can be a significant performance enhancement.
|
||||
MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION
|
||||
)
|
||||
|
||||
func init() {
|
||||
errorMap[C.CL_COMPILE_PROGRAM_FAILURE] = ErrCompileProgramFailure
|
||||
errorMap[C.CL_DEVICE_PARTITION_FAILED] = ErrDevicePartitionFailed
|
||||
errorMap[C.CL_INVALID_COMPILER_OPTIONS] = ErrInvalidCompilerOptions
|
||||
errorMap[C.CL_INVALID_DEVICE_PARTITION_COUNT] = ErrInvalidDevicePartitionCount
|
||||
errorMap[C.CL_INVALID_IMAGE_DESCRIPTOR] = ErrInvalidImageDescriptor
|
||||
errorMap[C.CL_INVALID_LINKER_OPTIONS] = ErrInvalidLinkerOptions
|
||||
errorMap[C.CL_KERNEL_ARG_INFO_NOT_AVAILABLE] = ErrKernelArgInfoNotAvailable
|
||||
errorMap[C.CL_LINK_PROGRAM_FAILURE] = ErrLinkProgramFailure
|
||||
errorMap[C.CL_LINKER_NOT_AVAILABLE] = ErrLinkerNotAvailable
|
||||
channelOrderNameMap[ChannelOrderDepth] = "Depth"
|
||||
channelOrderNameMap[ChannelOrderDepthStencil] = "DepthStencil"
|
||||
channelDataTypeNameMap[ChannelDataTypeUNormInt24] = "UNormInt24"
|
||||
}
|
||||
|
||||
type ImageDescription struct {
|
||||
Type MemObjectType
|
||||
Width, Height, Depth int
|
||||
ArraySize, RowPitch, SlicePitch int
|
||||
NumMipLevels, NumSamples int
|
||||
Buffer *MemObject
|
||||
}
|
||||
|
||||
func (d ImageDescription) toCl() C.cl_image_desc {
|
||||
var desc C.cl_image_desc
|
||||
desc.image_type = C.cl_mem_object_type(d.Type)
|
||||
desc.image_width = C.size_t(d.Width)
|
||||
desc.image_height = C.size_t(d.Height)
|
||||
desc.image_depth = C.size_t(d.Depth)
|
||||
desc.image_array_size = C.size_t(d.ArraySize)
|
||||
desc.image_row_pitch = C.size_t(d.RowPitch)
|
||||
desc.image_slice_pitch = C.size_t(d.SlicePitch)
|
||||
desc.num_mip_levels = C.cl_uint(d.NumMipLevels)
|
||||
desc.num_samples = C.cl_uint(d.NumSamples)
|
||||
desc.buffer = nil
|
||||
if d.Buffer != nil {
|
||||
desc.buffer = d.Buffer.clMem
|
||||
}
|
||||
return desc
|
||||
}
|
45
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
generated
vendored
Normal file
45
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
generated
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
package cl
|
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C"
|
||||
|
||||
// Extension: cl_APPLE_fixed_alpha_channel_orders
|
||||
//
|
||||
// These selectors may be passed to clCreateImage2D() in the cl_image_format.image_channel_order field.
|
||||
// They are like CL_BGRA and CL_ARGB except that the alpha channel to be ignored. On calls to read_imagef,
|
||||
// the alpha will be 0xff (1.0f) if the sample falls in the image and 0 if it does not fall in the image.
|
||||
// On calls to write_imagef, the alpha value is ignored and 0xff (1.0f) is written. These formats are
|
||||
// currently only available for the CL_UNORM_INT8 cl_channel_type. They are intended to support legacy
|
||||
// image formats.
|
||||
const (
|
||||
ChannelOrder1RGBApple ChannelOrder = C.CL_1RGB_APPLE // Introduced in MacOS X.7.
|
||||
ChannelOrderBGR1Apple ChannelOrder = C.CL_BGR1_APPLE // Introduced in MacOS X.7.
|
||||
)
|
||||
|
||||
// Extension: cl_APPLE_biased_fixed_point_image_formats
|
||||
//
|
||||
// This selector may be passed to clCreateImage2D() in the cl_image_format.image_channel_data_type field.
|
||||
// It defines a biased signed 1.14 fixed point storage format, with range [-1, 3). The conversion from
|
||||
// float to this fixed point format is defined as follows:
|
||||
//
|
||||
// ushort float_to_sfixed14( float x ){
|
||||
// int i = convert_int_sat_rte( x * 0x1.0p14f ); // scale [-1, 3.0) to [-16384, 3*16384), round to nearest integer
|
||||
// i = add_sat( i, 0x4000 ); // apply bias, to convert to [0, 65535) range
|
||||
// return convert_ushort_sat(i); // clamp to destination size
|
||||
// }
|
||||
//
|
||||
// The inverse conversion is the reverse process. The formats are currently only available on the CPU with
|
||||
// the CL_RGBA channel layout.
|
||||
const (
|
||||
ChannelDataTypeSFixed14Apple ChannelDataType = C.CL_SFIXED14_APPLE // Introduced in MacOS X.7.
|
||||
)
|
||||
|
||||
func init() {
|
||||
channelOrderNameMap[ChannelOrder1RGBApple] = "1RGBApple"
|
||||
channelOrderNameMap[ChannelOrderBGR1Apple] = "RGB1Apple"
|
||||
channelDataTypeNameMap[ChannelDataTypeSFixed14Apple] = "SFixed14Apple"
|
||||
}
|
|
@ -30,8 +30,8 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
minDifficulty = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0))
|
||||
sharedLight = new(Light)
|
||||
maxUint256 = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0))
|
||||
sharedLight = new(Light)
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -140,7 +140,7 @@ func (l *Light) Verify(block pow.Block) bool {
|
|||
// the finalizer before the call completes.
|
||||
_ = cache
|
||||
// The actual check.
|
||||
target := new(big.Int).Div(minDifficulty, difficulty)
|
||||
target := new(big.Int).Div(maxUint256, difficulty)
|
||||
return h256ToHash(ret.result).Big().Cmp(target) <= 0
|
||||
}
|
||||
|
||||
|
@ -199,7 +199,7 @@ func (d *dag) generate() {
|
|||
if d.dir == "" {
|
||||
d.dir = DefaultDir
|
||||
}
|
||||
glog.V(logger.Info).Infof("Generating DAG for epoch %d (%x)", d.epoch, seedHash)
|
||||
glog.V(logger.Info).Infof("Generating DAG for epoch %d (size %d) (%x)", d.epoch, dagSize, seedHash)
|
||||
// Generate a temporary cache.
|
||||
// TODO: this could share the cache with Light
|
||||
cache := C.ethash_light_new_internal(cacheSize, (*C.ethash_h256_t)(unsafe.Pointer(&seedHash[0])))
|
||||
|
@ -220,14 +220,18 @@ func (d *dag) generate() {
|
|||
})
|
||||
}
|
||||
|
||||
func freeDAG(h *dag) {
|
||||
C.ethash_full_delete(h.ptr)
|
||||
h.ptr = nil
|
||||
func freeDAG(d *dag) {
|
||||
C.ethash_full_delete(d.ptr)
|
||||
d.ptr = nil
|
||||
}
|
||||
|
||||
func (d *dag) Ptr() unsafe.Pointer {
|
||||
return unsafe.Pointer(d.ptr.data)
|
||||
}
|
||||
|
||||
//export ethashGoCallback
|
||||
func ethashGoCallback(percent C.unsigned) C.int {
|
||||
glog.V(logger.Info).Infof("Still generating DAG: %d%%", percent)
|
||||
glog.V(logger.Info).Infof("Generating DAG: %d%%", percent)
|
||||
return 0
|
||||
}
|
||||
|
||||
|
@ -273,7 +277,7 @@ func (pow *Full) getDAG(blockNum uint64) (d *dag) {
|
|||
return d
|
||||
}
|
||||
|
||||
func (pow *Full) Search(block pow.Block, stop <-chan struct{}) (nonce uint64, mixDigest []byte) {
|
||||
func (pow *Full) Search(block pow.Block, stop <-chan struct{}, index int) (nonce uint64, mixDigest []byte) {
|
||||
dag := pow.getDAG(block.NumberU64())
|
||||
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
|
@ -286,7 +290,7 @@ func (pow *Full) Search(block pow.Block, stop <-chan struct{}) (nonce uint64, mi
|
|||
|
||||
nonce = uint64(r.Int63())
|
||||
hash := hashToH256(block.HashNoNonce())
|
||||
target := new(big.Int).Div(minDifficulty, diff)
|
||||
target := new(big.Int).Div(maxUint256, diff)
|
||||
for {
|
||||
select {
|
||||
case <-stop:
|
||||
|
|
629
Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go
generated
vendored
Normal file
629
Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go
generated
vendored
Normal file
|
@ -0,0 +1,629 @@
|
|||
// Copyright 2014 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// +build opencl
|
||||
|
||||
package ethash
|
||||
|
||||
//#cgo LDFLAGS: -w
|
||||
//#include <stdint.h>
|
||||
//#include <string.h>
|
||||
//#include "src/libethash/internal.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
crand "crypto/rand"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/big"
|
||||
mrand "math/rand"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/Gustav-Simonsson/go-opencl/cl"
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/pow"
|
||||
)
|
||||
|
||||
/*
|
||||
|
||||
This code have two main entry points:
|
||||
|
||||
1. The initCL(...) function configures one or more OpenCL device
|
||||
(for now only GPU) and loads the Ethash DAG onto device memory
|
||||
|
||||
2. The Search(...) function loads a Ethash nonce into device(s) memory and
|
||||
executes the Ethash OpenCL kernel.
|
||||
|
||||
Throughout the code, we refer to "host memory" and "device memory".
|
||||
For most systems (e.g. regular PC GPU miner) the host memory is RAM and
|
||||
device memory is the GPU global memory (e.g. GDDR5).
|
||||
|
||||
References mentioned in code comments:
|
||||
|
||||
1. https://github.com/ethereum/wiki/wiki/Ethash
|
||||
2. https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner.cpp
|
||||
3. https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/
|
||||
4. http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_OpenCL_Programming_User_Guide.pdf
|
||||
|
||||
*/
|
||||
|
||||
type OpenCLDevice struct {
|
||||
deviceId int
|
||||
device *cl.Device
|
||||
openCL11 bool // OpenCL version 1.1 and 1.2 are handled a bit different
|
||||
openCL12 bool
|
||||
|
||||
dagBuf *cl.MemObject // Ethash full DAG in device mem
|
||||
headerBuf *cl.MemObject // Hash of block-to-mine in device mem
|
||||
searchBuffers []*cl.MemObject
|
||||
|
||||
searchKernel *cl.Kernel
|
||||
hashKernel *cl.Kernel
|
||||
|
||||
queue *cl.CommandQueue
|
||||
ctx *cl.Context
|
||||
workGroupSize int
|
||||
|
||||
nonceRand *mrand.Rand // seeded by crypto/rand, see comments where it's initialised
|
||||
result common.Hash
|
||||
}
|
||||
|
||||
type OpenCLMiner struct {
|
||||
mu sync.Mutex
|
||||
|
||||
ethash *Ethash // Ethash full DAG & cache in host mem
|
||||
|
||||
deviceIds []int
|
||||
devices []*OpenCLDevice
|
||||
|
||||
dagSize uint64
|
||||
|
||||
hashRate int32 // Go atomics & uint64 have some issues; int32 is supported on all platforms
|
||||
}
|
||||
|
||||
type pendingSearch struct {
|
||||
bufIndex uint32
|
||||
startNonce uint64
|
||||
}
|
||||
|
||||
const (
|
||||
SIZEOF_UINT32 = 4
|
||||
|
||||
// See [1]
|
||||
ethashMixBytesLen = 128
|
||||
ethashAccesses = 64
|
||||
|
||||
// See [4]
|
||||
workGroupSize = 32 // must be multiple of 8
|
||||
maxSearchResults = 63
|
||||
searchBufSize = 2
|
||||
globalWorkSize = 1024 * 256
|
||||
)
|
||||
|
||||
func NewCL(deviceIds []int) *OpenCLMiner {
|
||||
ids := make([]int, len(deviceIds))
|
||||
copy(ids, deviceIds)
|
||||
return &OpenCLMiner{
|
||||
ethash: New(),
|
||||
dagSize: 0, // to see if we need to update DAG.
|
||||
deviceIds: ids,
|
||||
}
|
||||
}
|
||||
|
||||
func PrintDevices() {
|
||||
fmt.Println("=============================================")
|
||||
fmt.Println("============ OpenCL Device Info =============")
|
||||
fmt.Println("=============================================")
|
||||
|
||||
var found []*cl.Device
|
||||
|
||||
platforms, err := cl.GetPlatforms()
|
||||
if err != nil {
|
||||
fmt.Println("Plaform error (check your OpenCL installation): %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
for i, p := range platforms {
|
||||
fmt.Println("Platform id ", i)
|
||||
fmt.Println("Platform Name ", p.Name())
|
||||
fmt.Println("Platform Vendor ", p.Vendor())
|
||||
fmt.Println("Platform Version ", p.Version())
|
||||
fmt.Println("Platform Extensions ", p.Extensions())
|
||||
fmt.Println("Platform Profile ", p.Profile())
|
||||
fmt.Println("")
|
||||
|
||||
devices, err := cl.GetDevices(p, cl.DeviceTypeGPU)
|
||||
if err != nil {
|
||||
fmt.Println("Device error (check your GPU drivers) :", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, d := range devices {
|
||||
fmt.Println("Device OpenCL id ", i)
|
||||
fmt.Println("Device id for mining ", len(found))
|
||||
fmt.Println("Device Name ", d.Name())
|
||||
fmt.Println("Vendor ", d.Vendor())
|
||||
fmt.Println("Version ", d.Version())
|
||||
fmt.Println("Driver version ", d.DriverVersion())
|
||||
fmt.Println("Address bits ", d.AddressBits())
|
||||
fmt.Println("Max clock freq ", d.MaxClockFrequency())
|
||||
fmt.Println("Global mem size ", d.GlobalMemSize())
|
||||
fmt.Println("Max constant buffer size", d.MaxConstantBufferSize())
|
||||
fmt.Println("Max mem alloc size ", d.MaxMemAllocSize())
|
||||
fmt.Println("Max compute units ", d.MaxComputeUnits())
|
||||
fmt.Println("Max work group size ", d.MaxWorkGroupSize())
|
||||
fmt.Println("Max work item sizes ", d.MaxWorkItemSizes())
|
||||
fmt.Println("=============================================")
|
||||
|
||||
found = append(found, d)
|
||||
}
|
||||
}
|
||||
if len(found) == 0 {
|
||||
fmt.Println("Found no GPU(s). Check that your OS can see the GPU(s)")
|
||||
} else {
|
||||
var idsFormat string
|
||||
for i := 0; i < len(found); i++ {
|
||||
idsFormat += strconv.Itoa(i)
|
||||
if i != len(found)-1 {
|
||||
idsFormat += ","
|
||||
}
|
||||
}
|
||||
fmt.Printf("Found %v devices. Benchmark first GPU: geth gpubench 0\n", len(found))
|
||||
fmt.Printf("Mine using all GPUs: geth --minegpu %v\n", idsFormat)
|
||||
}
|
||||
}
|
||||
|
||||
// See [2]. We basically do the same here, but the Go OpenCL bindings
|
||||
// are at a slightly higher abtraction level.
|
||||
func InitCL(blockNum uint64, c *OpenCLMiner) error {
|
||||
platforms, err := cl.GetPlatforms()
|
||||
if err != nil {
|
||||
return fmt.Errorf("Plaform error: %v\nCheck your OpenCL installation and then run geth gpuinfo", err)
|
||||
}
|
||||
|
||||
var devices []*cl.Device
|
||||
for _, p := range platforms {
|
||||
ds, err := cl.GetDevices(p, cl.DeviceTypeGPU)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Devices error: %v\nCheck your GPU drivers and then run geth gpuinfo", err)
|
||||
}
|
||||
for _, d := range ds {
|
||||
devices = append(devices, d)
|
||||
}
|
||||
}
|
||||
|
||||
pow := New()
|
||||
_ = pow.getDAG(blockNum) // generates DAG if we don't have it
|
||||
pow.Light.getCache(blockNum) // and cache
|
||||
|
||||
c.ethash = pow
|
||||
dagSize := uint64(C.ethash_get_datasize(C.uint64_t(blockNum)))
|
||||
c.dagSize = dagSize
|
||||
|
||||
for _, id := range c.deviceIds {
|
||||
if id > len(devices)-1 {
|
||||
return fmt.Errorf("Device id not found. See available device ids with: geth gpuinfo")
|
||||
} else {
|
||||
err := initCLDevice(id, devices[id], c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(c.devices) == 0 {
|
||||
return fmt.Errorf("No GPU devices found")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func initCLDevice(deviceId int, device *cl.Device, c *OpenCLMiner) error {
|
||||
devMaxAlloc := uint64(device.MaxMemAllocSize())
|
||||
devGlobalMem := uint64(device.GlobalMemSize())
|
||||
|
||||
// TODO: more fine grained version logic
|
||||
if device.Version() == "OpenCL 1.0" {
|
||||
fmt.Println("Device OpenCL version not supported: ", device.Version())
|
||||
return fmt.Errorf("opencl version not supported")
|
||||
}
|
||||
|
||||
var cl11, cl12 bool
|
||||
if device.Version() == "OpenCL 1.1" {
|
||||
cl11 = true
|
||||
}
|
||||
if device.Version() == "OpenCL 1.2" {
|
||||
cl12 = true
|
||||
}
|
||||
|
||||
// log warnings but carry on; some device drivers report inaccurate values
|
||||
if c.dagSize > devGlobalMem {
|
||||
fmt.Printf("WARNING: device memory may be insufficient: %v. DAG size: %v.\n", devGlobalMem, c.dagSize)
|
||||
}
|
||||
|
||||
if c.dagSize > devMaxAlloc {
|
||||
fmt.Printf("WARNING: DAG size (%v) larger than device max memory allocation size (%v).\n", c.dagSize, devMaxAlloc)
|
||||
fmt.Printf("You probably have to export GPU_MAX_ALLOC_PERCENT=95\n")
|
||||
}
|
||||
|
||||
fmt.Printf("Initialising device %v: %v\n", deviceId, device.Name())
|
||||
|
||||
context, err := cl.CreateContext([]*cl.Device{device})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed creating context:", err)
|
||||
}
|
||||
|
||||
// TODO: test running with CL_QUEUE_PROFILING_ENABLE for profiling?
|
||||
queue, err := context.CreateCommandQueue(device, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("command queue err:", err)
|
||||
}
|
||||
|
||||
// See [4] section 3.2 and [3] "clBuildProgram".
|
||||
// The OpenCL kernel code is compiled at run-time.
|
||||
kvs := make(map[string]string, 4)
|
||||
kvs["GROUP_SIZE"] = strconv.FormatUint(workGroupSize, 10)
|
||||
kvs["DAG_SIZE"] = strconv.FormatUint(c.dagSize/ethashMixBytesLen, 10)
|
||||
kvs["ACCESSES"] = strconv.FormatUint(ethashAccesses, 10)
|
||||
kvs["MAX_OUTPUTS"] = strconv.FormatUint(maxSearchResults, 10)
|
||||
kernelCode := replaceWords(kernel, kvs)
|
||||
|
||||
program, err := context.CreateProgramWithSource([]string{kernelCode})
|
||||
if err != nil {
|
||||
return fmt.Errorf("program err:", err)
|
||||
}
|
||||
|
||||
/* if using AMD OpenCL impl, you can set this to debug on x86 CPU device.
|
||||
see AMD OpenCL programming guide section 4.2
|
||||
|
||||
export in shell before running:
|
||||
export AMD_OCL_BUILD_OPTIONS_APPEND="-g -O0"
|
||||
export CPU_MAX_COMPUTE_UNITS=1
|
||||
|
||||
buildOpts := "-g -cl-opt-disable"
|
||||
|
||||
*/
|
||||
buildOpts := ""
|
||||
err = program.BuildProgram([]*cl.Device{device}, buildOpts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("program build err:", err)
|
||||
}
|
||||
|
||||
var searchKernelName, hashKernelName string
|
||||
searchKernelName = "ethash_search"
|
||||
hashKernelName = "ethash_hash"
|
||||
|
||||
searchKernel, err := program.CreateKernel(searchKernelName)
|
||||
hashKernel, err := program.CreateKernel(hashKernelName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("kernel err:", err)
|
||||
}
|
||||
|
||||
// TODO: when this DAG size appears, patch the Go bindings
|
||||
// (context.go) to work with uint64 as size_t
|
||||
if c.dagSize > math.MaxInt32 {
|
||||
fmt.Println("DAG too large for allocation.")
|
||||
return fmt.Errorf("DAG too large for alloc")
|
||||
}
|
||||
|
||||
// TODO: patch up Go bindings to work with size_t, will overflow if > maxint32
|
||||
// TODO: fuck. shit's gonna overflow around 2017-06-09 12:17:02
|
||||
dagBuf := *(new(*cl.MemObject))
|
||||
dagBuf, err = context.CreateEmptyBuffer(cl.MemReadOnly, int(c.dagSize))
|
||||
if err != nil {
|
||||
return fmt.Errorf("allocating dag buf failed: ", err)
|
||||
}
|
||||
|
||||
// write DAG to device mem
|
||||
dagPtr := unsafe.Pointer(c.ethash.Full.current.ptr.data)
|
||||
_, err = queue.EnqueueWriteBuffer(dagBuf, true, 0, int(c.dagSize), dagPtr, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("writing to dag buf failed: ", err)
|
||||
}
|
||||
|
||||
searchBuffers := make([]*cl.MemObject, searchBufSize)
|
||||
for i := 0; i < searchBufSize; i++ {
|
||||
searchBuff, err := context.CreateEmptyBuffer(cl.MemWriteOnly, (1+maxSearchResults)*SIZEOF_UINT32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("search buffer err:", err)
|
||||
}
|
||||
searchBuffers[i] = searchBuff
|
||||
}
|
||||
|
||||
headerBuf, err := context.CreateEmptyBuffer(cl.MemReadOnly, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("header buffer err:", err)
|
||||
}
|
||||
|
||||
// Unique, random nonces are crucial for mining efficieny.
|
||||
// While we do not need cryptographically secure PRNG for nonces,
|
||||
// we want to have uniform distribution and minimal repetition of nonces.
|
||||
// We could guarantee strict uniqueness of nonces by generating unique ranges,
|
||||
// but a int64 seed from crypto/rand should be good enough.
|
||||
// we then use math/rand for speed and to avoid draining OS entropy pool
|
||||
seed, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
nonceRand := mrand.New(mrand.NewSource(seed.Int64()))
|
||||
|
||||
deviceStruct := &OpenCLDevice{
|
||||
deviceId: deviceId,
|
||||
device: device,
|
||||
openCL11: cl11,
|
||||
openCL12: cl12,
|
||||
|
||||
dagBuf: dagBuf,
|
||||
headerBuf: headerBuf,
|
||||
searchBuffers: searchBuffers,
|
||||
|
||||
searchKernel: searchKernel,
|
||||
hashKernel: hashKernel,
|
||||
|
||||
queue: queue,
|
||||
ctx: context,
|
||||
|
||||
workGroupSize: workGroupSize,
|
||||
|
||||
nonceRand: nonceRand,
|
||||
}
|
||||
c.devices = append(c.devices, deviceStruct)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *OpenCLMiner) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
|
||||
c.mu.Lock()
|
||||
newDagSize := uint64(C.ethash_get_datasize(C.uint64_t(block.NumberU64())))
|
||||
if newDagSize > c.dagSize {
|
||||
// TODO: clean up buffers from previous DAG?
|
||||
err := InitCL(block.NumberU64(), c)
|
||||
if err != nil {
|
||||
fmt.Println("OpenCL init error: ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
}
|
||||
defer c.mu.Unlock()
|
||||
|
||||
// Avoid unneeded OpenCL initialisation if we received stop while running InitCL
|
||||
select {
|
||||
case <-stop:
|
||||
return 0, []byte{0}
|
||||
default:
|
||||
}
|
||||
|
||||
headerHash := block.HashNoNonce()
|
||||
diff := block.Difficulty()
|
||||
target256 := new(big.Int).Div(maxUint256, diff)
|
||||
target64 := new(big.Int).Rsh(target256, 192).Uint64()
|
||||
var zero uint32 = 0
|
||||
|
||||
d := c.devices[index]
|
||||
|
||||
_, err := d.queue.EnqueueWriteBuffer(d.headerBuf, false, 0, 32, unsafe.Pointer(&headerHash[0]), nil)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
|
||||
for i := 0; i < searchBufSize; i++ {
|
||||
_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[i], false, 0, 4, unsafe.Pointer(&zero), nil)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
}
|
||||
|
||||
// wait for all search buffers to complete
|
||||
err = d.queue.Finish()
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clFinish : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
|
||||
err = d.searchKernel.SetArg(1, d.headerBuf)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clSetKernelArg : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
|
||||
err = d.searchKernel.SetArg(2, d.dagBuf)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clSetKernelArg : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
|
||||
err = d.searchKernel.SetArg(4, target64)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clSetKernelArg : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
err = d.searchKernel.SetArg(5, uint32(math.MaxUint32))
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clSetKernelArg : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
|
||||
// wait on this before returning
|
||||
var preReturnEvent *cl.Event
|
||||
if d.openCL12 {
|
||||
preReturnEvent, err = d.ctx.CreateUserEvent()
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search create CL user event : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
}
|
||||
|
||||
pending := make([]pendingSearch, 0, searchBufSize)
|
||||
var p *pendingSearch
|
||||
searchBufIndex := uint32(0)
|
||||
var checkNonce uint64
|
||||
loops := int64(0)
|
||||
prevHashRate := int32(0)
|
||||
start := time.Now().UnixNano()
|
||||
// we grab a single random nonce and sets this as argument to the kernel search function
|
||||
// the device will then add each local threads gid to the nonce, creating a unique nonce
|
||||
// for each device computing unit executing in parallel
|
||||
initNonce := uint64(d.nonceRand.Int63())
|
||||
for nonce := initNonce; ; nonce += uint64(globalWorkSize) {
|
||||
select {
|
||||
case <-stop:
|
||||
|
||||
/*
|
||||
if d.openCL12 {
|
||||
err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search WaitForEvents: ", err)
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
atomic.AddInt32(&c.hashRate, -prevHashRate)
|
||||
return 0, []byte{0}
|
||||
default:
|
||||
}
|
||||
|
||||
if (loops % (1 << 7)) == 0 {
|
||||
elapsed := time.Now().UnixNano() - start
|
||||
// TODO: verify if this is correct hash rate calculation
|
||||
hashes := (float64(1e9) / float64(elapsed)) * float64(loops*1024*256)
|
||||
hashrateDiff := int32(hashes) - prevHashRate
|
||||
prevHashRate = int32(hashes)
|
||||
atomic.AddInt32(&c.hashRate, hashrateDiff)
|
||||
}
|
||||
loops++
|
||||
|
||||
err = d.searchKernel.SetArg(0, d.searchBuffers[searchBufIndex])
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clSetKernelArg : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
err = d.searchKernel.SetArg(3, nonce)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clSetKernelArg : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
|
||||
// execute kernel
|
||||
_, err := d.queue.EnqueueNDRangeKernel(
|
||||
d.searchKernel,
|
||||
[]int{0},
|
||||
[]int{globalWorkSize},
|
||||
[]int{d.workGroupSize},
|
||||
nil)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clEnqueueNDRangeKernel : ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
|
||||
pending = append(pending, pendingSearch{bufIndex: searchBufIndex, startNonce: nonce})
|
||||
searchBufIndex = (searchBufIndex + 1) % searchBufSize
|
||||
|
||||
if len(pending) == searchBufSize {
|
||||
p = &(pending[searchBufIndex])
|
||||
cres, _, err := d.queue.EnqueueMapBuffer(d.searchBuffers[p.bufIndex], true,
|
||||
cl.MapFlagRead, 0, (1+maxSearchResults)*SIZEOF_UINT32,
|
||||
nil)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clEnqueueMapBuffer: ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
|
||||
results := cres.ByteSlice()
|
||||
nfound := binary.LittleEndian.Uint32(results)
|
||||
nfound = uint32(math.Min(float64(nfound), float64(maxSearchResults)))
|
||||
// OpenCL returns the offsets from the start nonce
|
||||
for i := uint32(0); i < nfound; i++ {
|
||||
lo := (i + 1) * SIZEOF_UINT32
|
||||
hi := (i + 2) * SIZEOF_UINT32
|
||||
upperNonce := uint64(binary.LittleEndian.Uint32(results[lo:hi]))
|
||||
checkNonce = p.startNonce + upperNonce
|
||||
if checkNonce != 0 {
|
||||
cn := C.uint64_t(checkNonce)
|
||||
ds := C.uint64_t(c.dagSize)
|
||||
// We verify that the nonce is indeed a solution by
|
||||
// executing the Ethash verification function (on the CPU).
|
||||
ret := C.ethash_light_compute_internal(c.ethash.Light.current.ptr, ds, hashToH256(headerHash), cn)
|
||||
// TODO: return result first
|
||||
if ret.success && h256ToHash(ret.result).Big().Cmp(target256) <= 0 {
|
||||
_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clEnqueueUnmapMemObject: ", err)
|
||||
}
|
||||
if d.openCL12 {
|
||||
err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search WaitForEvents: ", err)
|
||||
}
|
||||
}
|
||||
return checkNonce, C.GoBytes(unsafe.Pointer(&ret.mix_hash), C.int(32))
|
||||
}
|
||||
|
||||
_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[p.bufIndex], false, 0, 4, unsafe.Pointer(&zero), nil)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search cl: EnqueueWriteBuffer", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
}
|
||||
}
|
||||
_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clEnqueueUnMapMemObject: ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
pending = append(pending[:searchBufIndex], pending[searchBufIndex+1:]...)
|
||||
}
|
||||
}
|
||||
if d.openCL12 {
|
||||
err := cl.WaitForEvents([]*cl.Event{preReturnEvent})
|
||||
if err != nil {
|
||||
fmt.Println("Error in Search clWaitForEvents: ", err)
|
||||
return 0, []byte{0}
|
||||
}
|
||||
}
|
||||
return 0, []byte{0}
|
||||
}
|
||||
|
||||
func (c *OpenCLMiner) Verify(block pow.Block) bool {
|
||||
return c.ethash.Light.Verify(block)
|
||||
}
|
||||
func (c *OpenCLMiner) GetHashrate() int64 {
|
||||
return int64(atomic.LoadInt32(&c.hashRate))
|
||||
}
|
||||
func (c *OpenCLMiner) Turbo(on bool) {
|
||||
// This is GPU mining. Always be turbo.
|
||||
}
|
||||
|
||||
func replaceWords(text string, kvs map[string]string) string {
|
||||
for k, v := range kvs {
|
||||
text = strings.Replace(text, k, v, -1)
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
func logErr(err error) {
|
||||
if err != nil {
|
||||
fmt.Println("Error in OpenCL call:", err)
|
||||
}
|
||||
}
|
||||
|
||||
func argErr(err error) error {
|
||||
return fmt.Errorf("arg err: %v", err)
|
||||
}
|
600
Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
generated
vendored
Normal file
600
Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
generated
vendored
Normal file
|
@ -0,0 +1,600 @@
|
|||
package ethash
|
||||
|
||||
/* DO NOT EDIT!!!
|
||||
|
||||
This code is version controlled at
|
||||
https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner_kernel.cl
|
||||
|
||||
If needed change it there first, then copy over here.
|
||||
*/
|
||||
|
||||
const kernel = `
|
||||
// author Tim Hughes <tim@twistedfury.com>
|
||||
// Tested on Radeon HD 7850
|
||||
// Hashrate: 15940347 hashes/s
|
||||
// Bandwidth: 124533 MB/s
|
||||
// search kernel should fit in <= 84 VGPRS (3 wavefronts)
|
||||
|
||||
#define THREADS_PER_HASH (128 / 16)
|
||||
#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
|
||||
|
||||
#define FNV_PRIME 0x01000193
|
||||
|
||||
__constant uint2 const Keccak_f1600_RC[24] = {
|
||||
(uint2)(0x00000001, 0x00000000),
|
||||
(uint2)(0x00008082, 0x00000000),
|
||||
(uint2)(0x0000808a, 0x80000000),
|
||||
(uint2)(0x80008000, 0x80000000),
|
||||
(uint2)(0x0000808b, 0x00000000),
|
||||
(uint2)(0x80000001, 0x00000000),
|
||||
(uint2)(0x80008081, 0x80000000),
|
||||
(uint2)(0x00008009, 0x80000000),
|
||||
(uint2)(0x0000008a, 0x00000000),
|
||||
(uint2)(0x00000088, 0x00000000),
|
||||
(uint2)(0x80008009, 0x00000000),
|
||||
(uint2)(0x8000000a, 0x00000000),
|
||||
(uint2)(0x8000808b, 0x00000000),
|
||||
(uint2)(0x0000008b, 0x80000000),
|
||||
(uint2)(0x00008089, 0x80000000),
|
||||
(uint2)(0x00008003, 0x80000000),
|
||||
(uint2)(0x00008002, 0x80000000),
|
||||
(uint2)(0x00000080, 0x80000000),
|
||||
(uint2)(0x0000800a, 0x00000000),
|
||||
(uint2)(0x8000000a, 0x80000000),
|
||||
(uint2)(0x80008081, 0x80000000),
|
||||
(uint2)(0x00008080, 0x80000000),
|
||||
(uint2)(0x80000001, 0x00000000),
|
||||
(uint2)(0x80008008, 0x80000000),
|
||||
};
|
||||
|
||||
void keccak_f1600_round(uint2* a, uint r, uint out_size)
|
||||
{
|
||||
#if !__ENDIAN_LITTLE__
|
||||
for (uint i = 0; i != 25; ++i)
|
||||
a[i] = a[i].yx;
|
||||
#endif
|
||||
|
||||
uint2 b[25];
|
||||
uint2 t;
|
||||
|
||||
// Theta
|
||||
b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20];
|
||||
b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21];
|
||||
b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22];
|
||||
b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23];
|
||||
b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24];
|
||||
t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31);
|
||||
a[0] ^= t;
|
||||
a[5] ^= t;
|
||||
a[10] ^= t;
|
||||
a[15] ^= t;
|
||||
a[20] ^= t;
|
||||
t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31);
|
||||
a[1] ^= t;
|
||||
a[6] ^= t;
|
||||
a[11] ^= t;
|
||||
a[16] ^= t;
|
||||
a[21] ^= t;
|
||||
t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31);
|
||||
a[2] ^= t;
|
||||
a[7] ^= t;
|
||||
a[12] ^= t;
|
||||
a[17] ^= t;
|
||||
a[22] ^= t;
|
||||
t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31);
|
||||
a[3] ^= t;
|
||||
a[8] ^= t;
|
||||
a[13] ^= t;
|
||||
a[18] ^= t;
|
||||
a[23] ^= t;
|
||||
t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31);
|
||||
a[4] ^= t;
|
||||
a[9] ^= t;
|
||||
a[14] ^= t;
|
||||
a[19] ^= t;
|
||||
a[24] ^= t;
|
||||
|
||||
// Rho Pi
|
||||
b[0] = a[0];
|
||||
b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31);
|
||||
b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29);
|
||||
b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26);
|
||||
b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22);
|
||||
b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17);
|
||||
b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11);
|
||||
b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4);
|
||||
b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28);
|
||||
b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19);
|
||||
b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9);
|
||||
b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30);
|
||||
b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18);
|
||||
b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5);
|
||||
b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23);
|
||||
b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8);
|
||||
b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24);
|
||||
b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7);
|
||||
b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21);
|
||||
b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2);
|
||||
b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14);
|
||||
b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25);
|
||||
b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3);
|
||||
b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12);
|
||||
b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20);
|
||||
|
||||
// Chi
|
||||
a[0] = bitselect(b[0] ^ b[2], b[0], b[1]);
|
||||
a[1] = bitselect(b[1] ^ b[3], b[1], b[2]);
|
||||
a[2] = bitselect(b[2] ^ b[4], b[2], b[3]);
|
||||
a[3] = bitselect(b[3] ^ b[0], b[3], b[4]);
|
||||
if (out_size >= 4)
|
||||
{
|
||||
a[4] = bitselect(b[4] ^ b[1], b[4], b[0]);
|
||||
a[5] = bitselect(b[5] ^ b[7], b[5], b[6]);
|
||||
a[6] = bitselect(b[6] ^ b[8], b[6], b[7]);
|
||||
a[7] = bitselect(b[7] ^ b[9], b[7], b[8]);
|
||||
a[8] = bitselect(b[8] ^ b[5], b[8], b[9]);
|
||||
if (out_size >= 8)
|
||||
{
|
||||
a[9] = bitselect(b[9] ^ b[6], b[9], b[5]);
|
||||
a[10] = bitselect(b[10] ^ b[12], b[10], b[11]);
|
||||
a[11] = bitselect(b[11] ^ b[13], b[11], b[12]);
|
||||
a[12] = bitselect(b[12] ^ b[14], b[12], b[13]);
|
||||
a[13] = bitselect(b[13] ^ b[10], b[13], b[14]);
|
||||
a[14] = bitselect(b[14] ^ b[11], b[14], b[10]);
|
||||
a[15] = bitselect(b[15] ^ b[17], b[15], b[16]);
|
||||
a[16] = bitselect(b[16] ^ b[18], b[16], b[17]);
|
||||
a[17] = bitselect(b[17] ^ b[19], b[17], b[18]);
|
||||
a[18] = bitselect(b[18] ^ b[15], b[18], b[19]);
|
||||
a[19] = bitselect(b[19] ^ b[16], b[19], b[15]);
|
||||
a[20] = bitselect(b[20] ^ b[22], b[20], b[21]);
|
||||
a[21] = bitselect(b[21] ^ b[23], b[21], b[22]);
|
||||
a[22] = bitselect(b[22] ^ b[24], b[22], b[23]);
|
||||
a[23] = bitselect(b[23] ^ b[20], b[23], b[24]);
|
||||
a[24] = bitselect(b[24] ^ b[21], b[24], b[20]);
|
||||
}
|
||||
}
|
||||
|
||||
// Iota
|
||||
a[0] ^= Keccak_f1600_RC[r];
|
||||
|
||||
#if !__ENDIAN_LITTLE__
|
||||
for (uint i = 0; i != 25; ++i)
|
||||
a[i] = a[i].yx;
|
||||
#endif
|
||||
}
|
||||
|
||||
void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate)
|
||||
{
|
||||
for (uint i = in_size; i != 25; ++i)
|
||||
{
|
||||
a[i] = 0;
|
||||
}
|
||||
#if __ENDIAN_LITTLE__
|
||||
a[in_size] ^= 0x0000000000000001;
|
||||
a[24-out_size*2] ^= 0x8000000000000000;
|
||||
#else
|
||||
a[in_size] ^= 0x0100000000000000;
|
||||
a[24-out_size*2] ^= 0x0000000000000080;
|
||||
#endif
|
||||
|
||||
// Originally I unrolled the first and last rounds to interface
|
||||
// better with surrounding code, however I haven't done this
|
||||
// without causing the AMD compiler to blow up the VGPR usage.
|
||||
uint r = 0;
|
||||
do
|
||||
{
|
||||
// This dynamic branch stops the AMD compiler unrolling the loop
|
||||
// and additionally saves about 33% of the VGPRs, enough to gain another
|
||||
// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
|
||||
// massage out of the compiler. It doesn't really seem to matter how
|
||||
// much we try and help the compiler save VGPRs because it seems to throw
|
||||
// that information away, hence the implementation of keccak here
|
||||
// doesn't bother.
|
||||
if (isolate)
|
||||
{
|
||||
keccak_f1600_round((uint2*)a, r++, 25);
|
||||
}
|
||||
}
|
||||
while (r < 23);
|
||||
|
||||
// final round optimised for digest size
|
||||
keccak_f1600_round((uint2*)a, r++, out_size);
|
||||
}
|
||||
|
||||
#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; }
|
||||
|
||||
#define countof(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
uint fnv(uint x, uint y)
|
||||
{
|
||||
return x * FNV_PRIME ^ y;
|
||||
}
|
||||
|
||||
uint4 fnv4(uint4 x, uint4 y)
|
||||
{
|
||||
return x * FNV_PRIME ^ y;
|
||||
}
|
||||
|
||||
uint fnv_reduce(uint4 v)
|
||||
{
|
||||
return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
|
||||
}
|
||||
|
||||
typedef union
|
||||
{
|
||||
ulong ulongs[32 / sizeof(ulong)];
|
||||
uint uints[32 / sizeof(uint)];
|
||||
} hash32_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
ulong ulongs[64 / sizeof(ulong)];
|
||||
uint4 uint4s[64 / sizeof(uint4)];
|
||||
} hash64_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
uint uints[128 / sizeof(uint)];
|
||||
uint4 uint4s[128 / sizeof(uint4)];
|
||||
} hash128_t;
|
||||
|
||||
hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate)
|
||||
{
|
||||
hash64_t init;
|
||||
uint const init_size = countof(init.ulongs);
|
||||
uint const hash_size = countof(header->ulongs);
|
||||
|
||||
// sha3_512(header .. nonce)
|
||||
ulong state[25];
|
||||
copy(state, header->ulongs, hash_size);
|
||||
state[hash_size] = nonce;
|
||||
keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate);
|
||||
|
||||
copy(init.ulongs, state, init_size);
|
||||
return init;
|
||||
}
|
||||
|
||||
uint inner_loop_chunks(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, __global hash128_t const* g_dag1, __global hash128_t const* g_dag2, __global hash128_t const* g_dag3, uint isolate)
|
||||
{
|
||||
uint4 mix = init;
|
||||
|
||||
// share init0
|
||||
if (thread_id == 0)
|
||||
*share = mix.x;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uint init0 = *share;
|
||||
|
||||
uint a = 0;
|
||||
do
|
||||
{
|
||||
bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
|
||||
|
||||
#pragma unroll
|
||||
for (uint i = 0; i != 4; ++i)
|
||||
{
|
||||
if (update_share)
|
||||
{
|
||||
uint m[4] = { mix.x, mix.y, mix.z, mix.w };
|
||||
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
mix = fnv4(mix, *share>=3 * DAG_SIZE / 4 ? g_dag3[*share - 3 * DAG_SIZE / 4].uint4s[thread_id] : *share>=DAG_SIZE / 2 ? g_dag2[*share - DAG_SIZE / 2].uint4s[thread_id] : *share>=DAG_SIZE / 4 ? g_dag1[*share - DAG_SIZE / 4].uint4s[thread_id]:g_dag[*share].uint4s[thread_id]);
|
||||
}
|
||||
} while ((a += 4) != (ACCESSES & isolate));
|
||||
|
||||
return fnv_reduce(mix);
|
||||
}
|
||||
|
||||
|
||||
|
||||
uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate)
|
||||
{
|
||||
uint4 mix = init;
|
||||
|
||||
// share init0
|
||||
if (thread_id == 0)
|
||||
*share = mix.x;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uint init0 = *share;
|
||||
|
||||
uint a = 0;
|
||||
do
|
||||
{
|
||||
bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
|
||||
|
||||
#pragma unroll
|
||||
for (uint i = 0; i != 4; ++i)
|
||||
{
|
||||
if (update_share)
|
||||
{
|
||||
uint m[4] = { mix.x, mix.y, mix.z, mix.w };
|
||||
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
mix = fnv4(mix, g_dag[*share].uint4s[thread_id]);
|
||||
}
|
||||
}
|
||||
while ((a += 4) != (ACCESSES & isolate));
|
||||
|
||||
return fnv_reduce(mix);
|
||||
}
|
||||
|
||||
|
||||
hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate)
|
||||
{
|
||||
ulong state[25];
|
||||
|
||||
hash32_t hash;
|
||||
uint const hash_size = countof(hash.ulongs);
|
||||
uint const init_size = countof(init->ulongs);
|
||||
uint const mix_size = countof(mix->ulongs);
|
||||
|
||||
// keccak_256(keccak_512(header..nonce) .. mix);
|
||||
copy(state, init->ulongs, init_size);
|
||||
copy(state + init_size, mix->ulongs, mix_size);
|
||||
keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate);
|
||||
|
||||
// copy out
|
||||
copy(hash.ulongs, state, hash_size);
|
||||
return hash;
|
||||
}
|
||||
|
||||
hash32_t compute_hash_simple(
|
||||
__constant hash32_t const* g_header,
|
||||
__global hash128_t const* g_dag,
|
||||
ulong nonce,
|
||||
uint isolate
|
||||
)
|
||||
{
|
||||
hash64_t init = init_hash(g_header, nonce, isolate);
|
||||
|
||||
hash128_t mix;
|
||||
for (uint i = 0; i != countof(mix.uint4s); ++i)
|
||||
{
|
||||
mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)];
|
||||
}
|
||||
|
||||
uint mix_val = mix.uints[0];
|
||||
uint init0 = mix.uints[0];
|
||||
uint a = 0;
|
||||
do
|
||||
{
|
||||
uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE;
|
||||
uint n = (a+1) % countof(mix.uints);
|
||||
|
||||
#pragma unroll
|
||||
for (uint i = 0; i != countof(mix.uints); ++i)
|
||||
{
|
||||
mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]);
|
||||
mix_val = i == n ? mix.uints[i] : mix_val;
|
||||
}
|
||||
}
|
||||
while (++a != (ACCESSES & isolate));
|
||||
|
||||
// reduce to output
|
||||
hash32_t fnv_mix;
|
||||
for (uint i = 0; i != countof(fnv_mix.uints); ++i)
|
||||
{
|
||||
fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]);
|
||||
}
|
||||
|
||||
return final_hash(&init, &fnv_mix, isolate);
|
||||
}
|
||||
|
||||
typedef union
|
||||
{
|
||||
struct
|
||||
{
|
||||
hash64_t init;
|
||||
uint pad; // avoid lds bank conflicts
|
||||
};
|
||||
hash32_t mix;
|
||||
} compute_hash_share;
|
||||
|
||||
|
||||
hash32_t compute_hash(
|
||||
__local compute_hash_share* share,
|
||||
__constant hash32_t const* g_header,
|
||||
__global hash128_t const* g_dag,
|
||||
ulong nonce,
|
||||
uint isolate
|
||||
)
|
||||
{
|
||||
uint const gid = get_global_id(0);
|
||||
|
||||
// Compute one init hash per work item.
|
||||
hash64_t init = init_hash(g_header, nonce, isolate);
|
||||
|
||||
// Threads work together in this phase in groups of 8.
|
||||
uint const thread_id = gid % THREADS_PER_HASH;
|
||||
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
|
||||
|
||||
hash32_t mix;
|
||||
uint i = 0;
|
||||
do
|
||||
{
|
||||
// share init with other threads
|
||||
if (i == thread_id)
|
||||
share[hash_id].init = init;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate);
|
||||
|
||||
share[hash_id].mix.uints[thread_id] = thread_mix;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (i == thread_id)
|
||||
mix = share[hash_id].mix;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
while (++i != (THREADS_PER_HASH & isolate));
|
||||
|
||||
return final_hash(&init, &mix, isolate);
|
||||
}
|
||||
|
||||
|
||||
hash32_t compute_hash_chunks(
|
||||
__local compute_hash_share* share,
|
||||
__constant hash32_t const* g_header,
|
||||
__global hash128_t const* g_dag,
|
||||
__global hash128_t const* g_dag1,
|
||||
__global hash128_t const* g_dag2,
|
||||
__global hash128_t const* g_dag3,
|
||||
ulong nonce,
|
||||
uint isolate
|
||||
)
|
||||
{
|
||||
uint const gid = get_global_id(0);
|
||||
|
||||
// Compute one init hash per work item.
|
||||
hash64_t init = init_hash(g_header, nonce, isolate);
|
||||
|
||||
// Threads work together in this phase in groups of 8.
|
||||
uint const thread_id = gid % THREADS_PER_HASH;
|
||||
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
|
||||
|
||||
hash32_t mix;
|
||||
uint i = 0;
|
||||
do
|
||||
{
|
||||
// share init with other threads
|
||||
if (i == thread_id)
|
||||
share[hash_id].init = init;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
uint thread_mix = inner_loop_chunks(thread_init, thread_id, share[hash_id].mix.uints, g_dag, g_dag1, g_dag2, g_dag3, isolate);
|
||||
|
||||
share[hash_id].mix.uints[thread_id] = thread_mix;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (i == thread_id)
|
||||
mix = share[hash_id].mix;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
while (++i != (THREADS_PER_HASH & isolate));
|
||||
|
||||
return final_hash(&init, &mix, isolate);
|
||||
}
|
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
||||
__kernel void ethash_hash_simple(
|
||||
__global hash32_t* g_hashes,
|
||||
__constant hash32_t const* g_header,
|
||||
__global hash128_t const* g_dag,
|
||||
ulong start_nonce,
|
||||
uint isolate
|
||||
)
|
||||
{
|
||||
uint const gid = get_global_id(0);
|
||||
g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
|
||||
}
|
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
||||
__kernel void ethash_search_simple(
|
||||
__global volatile uint* restrict g_output,
|
||||
__constant hash32_t const* g_header,
|
||||
__global hash128_t const* g_dag,
|
||||
ulong start_nonce,
|
||||
ulong target,
|
||||
uint isolate
|
||||
)
|
||||
{
|
||||
uint const gid = get_global_id(0);
|
||||
hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
|
||||
|
||||
if (hash.ulongs[countof(hash.ulongs)-1] < target)
|
||||
{
|
||||
uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
|
||||
g_output[slot] = gid;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
||||
__kernel void ethash_hash(
|
||||
__global hash32_t* g_hashes,
|
||||
__constant hash32_t const* g_header,
|
||||
__global hash128_t const* g_dag,
|
||||
ulong start_nonce,
|
||||
uint isolate
|
||||
)
|
||||
{
|
||||
__local compute_hash_share share[HASHES_PER_LOOP];
|
||||
|
||||
uint const gid = get_global_id(0);
|
||||
g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
|
||||
}
|
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
||||
__kernel void ethash_search(
|
||||
__global volatile uint* restrict g_output,
|
||||
__constant hash32_t const* g_header,
|
||||
__global hash128_t const* g_dag,
|
||||
ulong start_nonce,
|
||||
ulong target,
|
||||
uint isolate
|
||||
)
|
||||
{
|
||||
__local compute_hash_share share[HASHES_PER_LOOP];
|
||||
|
||||
uint const gid = get_global_id(0);
|
||||
hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
|
||||
|
||||
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
|
||||
{
|
||||
uint slot = min((uint)MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
|
||||
g_output[slot] = gid;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
||||
__kernel void ethash_hash_chunks(
|
||||
__global hash32_t* g_hashes,
|
||||
__constant hash32_t const* g_header,
|
||||
__global hash128_t const* g_dag,
|
||||
__global hash128_t const* g_dag1,
|
||||
__global hash128_t const* g_dag2,
|
||||
__global hash128_t const* g_dag3,
|
||||
ulong start_nonce,
|
||||
uint isolate
|
||||
)
|
||||
{
|
||||
__local compute_hash_share share[HASHES_PER_LOOP];
|
||||
|
||||
uint const gid = get_global_id(0);
|
||||
g_hashes[gid] = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3,start_nonce + gid, isolate);
|
||||
}
|
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
||||
__kernel void ethash_search_chunks(
|
||||
__global volatile uint* restrict g_output,
|
||||
__constant hash32_t const* g_header,
|
||||
__global hash128_t const* g_dag,
|
||||
__global hash128_t const* g_dag1,
|
||||
__global hash128_t const* g_dag2,
|
||||
__global hash128_t const* g_dag3,
|
||||
ulong start_nonce,
|
||||
ulong target,
|
||||
uint isolate
|
||||
)
|
||||
{
|
||||
__local compute_hash_share share[HASHES_PER_LOOP];
|
||||
|
||||
uint const gid = get_global_id(0);
|
||||
hash32_t hash = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3, start_nonce + gid, isolate);
|
||||
|
||||
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
|
||||
{
|
||||
uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
|
||||
g_output[slot] = gid;
|
||||
}
|
||||
}
|
||||
`
|
|
@ -92,7 +92,7 @@ func TestEthashConcurrentVerify(t *testing.T) {
|
|||
defer os.RemoveAll(eth.Full.Dir)
|
||||
|
||||
block := &testBlock{difficulty: big.NewInt(10)}
|
||||
nonce, md := eth.Search(block, nil)
|
||||
nonce, md := eth.Search(block, nil, 0)
|
||||
block.nonce = nonce
|
||||
block.mixDigest = common.BytesToHash(md)
|
||||
|
||||
|
@ -135,7 +135,7 @@ func TestEthashConcurrentSearch(t *testing.T) {
|
|||
// launch n searches concurrently.
|
||||
for i := 0; i < nsearch; i++ {
|
||||
go func() {
|
||||
nonce, md := eth.Search(block, stop)
|
||||
nonce, md := eth.Search(block, stop, 0)
|
||||
select {
|
||||
case found <- searchRes{n: nonce, md: md}:
|
||||
case <-stop:
|
||||
|
@ -167,7 +167,7 @@ func TestEthashSearchAcrossEpoch(t *testing.T) {
|
|||
for i := epochLength - 40; i < epochLength+40; i++ {
|
||||
block := &testBlock{number: i, difficulty: big.NewInt(90)}
|
||||
rand.Read(block.hashNoNonce[:])
|
||||
nonce, md := eth.Search(block, nil)
|
||||
nonce, md := eth.Search(block, nil, 0)
|
||||
block.nonce = nonce
|
||||
block.mixDigest = common.BytesToHash(md)
|
||||
if !eth.Verify(block) {
|
||||
|
|
6
Makefile
6
Makefile
|
@ -6,7 +6,7 @@
|
|||
GOBIN = build/bin
|
||||
|
||||
geth:
|
||||
build/env.sh go install -v $(shell build/ldflags.sh) ./cmd/geth
|
||||
build/env.sh go install -v $(shell build/flags.sh) ./cmd/geth
|
||||
@echo "Done building."
|
||||
@echo "Run \"$(GOBIN)/geth\" to launch geth."
|
||||
|
||||
|
@ -39,12 +39,12 @@ evm:
|
|||
@echo "Done building."
|
||||
@echo "Run \"$(GOBIN)/evm to start the evm."
|
||||
mist:
|
||||
build/env.sh go install -v $(shell build/ldflags.sh) ./cmd/mist
|
||||
build/env.sh go install -v $(shell build/flags.sh) ./cmd/mist
|
||||
@echo "Done building."
|
||||
@echo "Run \"$(GOBIN)/mist --asset_path=cmd/mist/assets\" to launch mist."
|
||||
|
||||
all:
|
||||
build/env.sh go install -v $(shell build/ldflags.sh) ./...
|
||||
build/env.sh go install -v $(shell build/flags.sh) ./...
|
||||
|
||||
test: all
|
||||
build/env.sh go test ./...
|
||||
|
|
|
@ -16,3 +16,7 @@ sep=$(go version | awk '{ if ($3 >= "go1.5" || index($3, "devel")) print "="; el
|
|||
if [ -f ".git/HEAD" ]; then
|
||||
echo "-ldflags '-X main.gitCommit$sep$(git rev-parse HEAD)'"
|
||||
fi
|
||||
|
||||
if [ ! -z "$GO_OPENCL" ]; then
|
||||
echo "-tags opencl"
|
||||
fi
|
|
@ -468,8 +468,7 @@ func processTxs(repl *testjethre, t *testing.T, expTxc int) bool {
|
|||
t.Errorf("incorrect number of pending transactions, expected %v, got %v", expTxc, txc)
|
||||
return false
|
||||
}
|
||||
|
||||
err = repl.ethereum.StartMining(runtime.NumCPU())
|
||||
err = repl.ethereum.StartMining(runtime.NumCPU(), "")
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error mining: %v", err)
|
||||
return false
|
||||
|
|
|
@ -104,6 +104,22 @@ The makedag command generates an ethash DAG in /tmp/dag.
|
|||
|
||||
This command exists to support the system testing project.
|
||||
Regular users do not need to execute it.
|
||||
`,
|
||||
},
|
||||
{
|
||||
Action: gpuinfo,
|
||||
Name: "gpuinfo",
|
||||
Usage: "gpuinfo",
|
||||
Description: `
|
||||
Prints OpenCL device info for all found GPUs.
|
||||
`,
|
||||
},
|
||||
{
|
||||
Action: gpubench,
|
||||
Name: "gpubench",
|
||||
Usage: "benchmark GPU",
|
||||
Description: `
|
||||
Runs quick benchmark on first GPU found.
|
||||
`,
|
||||
},
|
||||
{
|
||||
|
@ -298,6 +314,7 @@ JavaScript API. See https://github.com/ethereum/go-ethereum/wiki/Javascipt-Conso
|
|||
utils.GasPriceFlag,
|
||||
utils.MinerThreadsFlag,
|
||||
utils.MiningEnabledFlag,
|
||||
utils.MiningGPUFlag,
|
||||
utils.AutoDAGFlag,
|
||||
utils.NATFlag,
|
||||
utils.NatspecEnabledFlag,
|
||||
|
@ -586,7 +603,10 @@ func startEth(ctx *cli.Context, eth *eth.Ethereum) {
|
|||
}
|
||||
}
|
||||
if ctx.GlobalBool(utils.MiningEnabledFlag.Name) {
|
||||
if err := eth.StartMining(ctx.GlobalInt(utils.MinerThreadsFlag.Name)); err != nil {
|
||||
err := eth.StartMining(
|
||||
ctx.GlobalInt(utils.MinerThreadsFlag.Name),
|
||||
ctx.GlobalString(utils.MiningGPUFlag.Name))
|
||||
if err != nil {
|
||||
utils.Fatalf("%v", err)
|
||||
}
|
||||
}
|
||||
|
@ -740,6 +760,29 @@ func makedag(ctx *cli.Context) {
|
|||
}
|
||||
}
|
||||
|
||||
func gpuinfo(ctx *cli.Context) {
|
||||
eth.PrintOpenCLDevices()
|
||||
}
|
||||
|
||||
func gpubench(ctx *cli.Context) {
|
||||
args := ctx.Args()
|
||||
wrongArgs := func() {
|
||||
utils.Fatalf(`Usage: geth gpubench <gpu number>`)
|
||||
}
|
||||
switch {
|
||||
case len(args) == 1:
|
||||
n, err := strconv.ParseUint(args[0], 0, 64)
|
||||
if err != nil {
|
||||
wrongArgs()
|
||||
}
|
||||
eth.GPUBench(n)
|
||||
case len(args) == 0:
|
||||
eth.GPUBench(0)
|
||||
default:
|
||||
wrongArgs()
|
||||
}
|
||||
}
|
||||
|
||||
func version(c *cli.Context) {
|
||||
fmt.Println(ClientIdentifier)
|
||||
fmt.Println("Version:", Version)
|
||||
|
|
|
@ -155,6 +155,12 @@ var (
|
|||
}
|
||||
|
||||
// miner settings
|
||||
// TODO: refactor CPU vs GPU mining flags
|
||||
MiningGPUFlag = cli.StringFlag{
|
||||
Name: "minegpu",
|
||||
Usage: "Mine with given GPUs. '--minegpu 0,1' will mine with the first two GPUs found.",
|
||||
}
|
||||
|
||||
MinerThreadsFlag = cli.IntFlag{
|
||||
Name: "minerthreads",
|
||||
Usage: "Number of miner threads",
|
||||
|
|
|
@ -306,7 +306,7 @@ func processTxs(repl *testFrontend, t *testing.T, expTxc int) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
err = repl.ethereum.StartMining(runtime.NumCPU())
|
||||
err = repl.ethereum.StartMining(runtime.NumCPU(), "")
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error mining: %v", err)
|
||||
return false
|
||||
|
|
|
@ -32,7 +32,7 @@ import (
|
|||
// It returns true from Verify for any block.
|
||||
type FakePow struct{}
|
||||
|
||||
func (f FakePow) Search(block pow.Block, stop <-chan struct{}) (uint64, []byte) {
|
||||
func (f FakePow) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
|
||||
return 0, nil
|
||||
}
|
||||
func (f FakePow) Verify(block pow.Block) bool { return true }
|
||||
|
|
|
@ -34,7 +34,7 @@ type failPow struct {
|
|||
failing uint64
|
||||
}
|
||||
|
||||
func (pow failPow) Search(pow.Block, <-chan struct{}) (uint64, []byte) {
|
||||
func (pow failPow) Search(pow.Block, <-chan struct{}, int) (uint64, []byte) {
|
||||
return 0, nil
|
||||
}
|
||||
func (pow failPow) Verify(block pow.Block) bool { return block.NumberU64() != pow.failing }
|
||||
|
@ -47,7 +47,7 @@ type delayedPow struct {
|
|||
delay time.Duration
|
||||
}
|
||||
|
||||
func (pow delayedPow) Search(pow.Block, <-chan struct{}) (uint64, []byte) {
|
||||
func (pow delayedPow) Search(pow.Block, <-chan struct{}, int) (uint64, []byte) {
|
||||
return 0, nil
|
||||
}
|
||||
func (pow delayedPow) Verify(block pow.Block) bool { time.Sleep(pow.delay); return true }
|
||||
|
|
|
@ -498,18 +498,6 @@ func (s *Ethereum) ResetWithGenesisBlock(gb *types.Block) {
|
|||
s.blockchain.ResetWithGenesisBlock(gb)
|
||||
}
|
||||
|
||||
func (s *Ethereum) StartMining(threads int) error {
|
||||
eb, err := s.Etherbase()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
|
||||
glog.V(logger.Error).Infoln(err)
|
||||
return err
|
||||
}
|
||||
|
||||
go s.miner.Start(eb, threads)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Ethereum) Etherbase() (eb common.Address, err error) {
|
||||
eb = s.etherbase
|
||||
if (eb == common.Address{}) {
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright 2014 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// +build !opencl
|
||||
|
||||
package eth
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/ethereum/go-ethereum/logger"
|
||||
"github.com/ethereum/go-ethereum/logger/glog"
|
||||
)
|
||||
|
||||
const disabledInfo = "Set GO_OPENCL and re-build to enable."
|
||||
|
||||
func (s *Ethereum) StartMining(threads int, gpus string) error {
|
||||
eb, err := s.Etherbase()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
|
||||
glog.V(logger.Error).Infoln(err)
|
||||
return err
|
||||
}
|
||||
|
||||
if gpus != "" {
|
||||
return errors.New("GPU mining disabled. " + disabledInfo)
|
||||
}
|
||||
|
||||
// CPU mining
|
||||
go s.miner.Start(eb, threads)
|
||||
return nil
|
||||
}
|
||||
|
||||
func GPUBench(gpuid uint64) {
|
||||
fmt.Println("GPU mining disabled. " + disabledInfo)
|
||||
}
|
||||
|
||||
func PrintOpenCLDevices() {
|
||||
fmt.Println("OpenCL disabled. " + disabledInfo)
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
// Copyright 2014 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// +build opencl
|
||||
|
||||
package eth
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/big"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/ethash"
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/core/types"
|
||||
"github.com/ethereum/go-ethereum/logger"
|
||||
"github.com/ethereum/go-ethereum/logger/glog"
|
||||
"github.com/ethereum/go-ethereum/miner"
|
||||
)
|
||||
|
||||
func (s *Ethereum) StartMining(threads int, gpus string) error {
|
||||
eb, err := s.Etherbase()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
|
||||
glog.V(logger.Error).Infoln(err)
|
||||
return err
|
||||
}
|
||||
|
||||
// GPU mining
|
||||
if gpus != "" {
|
||||
var ids []int
|
||||
for _, s := range strings.Split(gpus, ",") {
|
||||
i, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Invalid GPU id(s): %v", err)
|
||||
}
|
||||
if i < 0 {
|
||||
return fmt.Errorf("Invalid GPU id: %v", i)
|
||||
}
|
||||
ids = append(ids, i)
|
||||
}
|
||||
|
||||
// TODO: re-creating miner is a bit ugly
|
||||
cl := ethash.NewCL(ids)
|
||||
s.miner = miner.New(s, s.EventMux(), cl)
|
||||
go s.miner.Start(eb, len(ids))
|
||||
return nil
|
||||
}
|
||||
|
||||
// CPU mining
|
||||
go s.miner.Start(eb, threads)
|
||||
return nil
|
||||
}
|
||||
|
||||
func GPUBench(gpuid uint64) {
|
||||
e := ethash.NewCL([]int{int(gpuid)})
|
||||
|
||||
var h common.Hash
|
||||
bogoHeader := &types.Header{
|
||||
ParentHash: h,
|
||||
Number: big.NewInt(int64(42)),
|
||||
Difficulty: big.NewInt(int64(999999999999999)),
|
||||
}
|
||||
bogoBlock := types.NewBlock(bogoHeader, nil, nil, nil)
|
||||
|
||||
err := ethash.InitCL(bogoBlock.NumberU64(), e)
|
||||
if err != nil {
|
||||
fmt.Println("OpenCL init error: ", err)
|
||||
return
|
||||
}
|
||||
|
||||
stopChan := make(chan struct{})
|
||||
reportHashRate := func() {
|
||||
for {
|
||||
time.Sleep(3 * time.Second)
|
||||
fmt.Printf("hashes/s : %v\n", e.GetHashrate())
|
||||
}
|
||||
}
|
||||
fmt.Printf("Starting benchmark (%v seconds)\n", 60)
|
||||
go reportHashRate()
|
||||
go e.Search(bogoBlock, stopChan, 0)
|
||||
time.Sleep(60 * time.Second)
|
||||
fmt.Println("OK.")
|
||||
}
|
||||
|
||||
func PrintOpenCLDevices() {
|
||||
ethash.PrintDevices()
|
||||
}
|
|
@ -118,7 +118,7 @@ func (self *CpuAgent) mine(work *Work, stop <-chan struct{}) {
|
|||
glog.V(logger.Debug).Infof("(re)started agent[%d]. mining...\n", self.index)
|
||||
|
||||
// Mine
|
||||
nonce, mixDigest := self.pow.Search(work.Block, stop)
|
||||
nonce, mixDigest := self.pow.Search(work.Block, stop, self.index)
|
||||
if nonce != 0 {
|
||||
block := work.Block.WithMiningResult(nonce, common.BytesToHash(mixDigest))
|
||||
self.returnCh <- &Result{work, block}
|
||||
|
|
|
@ -48,7 +48,7 @@ func (pow *EasyPow) Turbo(on bool) {
|
|||
pow.turbo = on
|
||||
}
|
||||
|
||||
func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}) (uint64, []byte) {
|
||||
func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
hash := block.HashNoNonce()
|
||||
diff := block.Difficulty()
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
package pow
|
||||
|
||||
type PoW interface {
|
||||
Search(block Block, stop <-chan struct{}) (uint64, []byte)
|
||||
Search(block Block, stop <-chan struct{}, index int) (uint64, []byte)
|
||||
Verify(block Block) bool
|
||||
GetHashrate() int64
|
||||
Turbo(bool)
|
||||
|
|
|
@ -100,7 +100,7 @@ func (self *minerApi) StartMiner(req *shared.Request) (interface{}, error) {
|
|||
}
|
||||
|
||||
self.ethereum.StartAutoDAG()
|
||||
err := self.ethereum.StartMining(args.Threads)
|
||||
err := self.ethereum.StartMining(args.Threads, "")
|
||||
if err == nil {
|
||||
return true, nil
|
||||
}
|
||||
|
|
|
@ -474,7 +474,7 @@ func (self *XEth) ClientVersion() string {
|
|||
func (self *XEth) SetMining(shouldmine bool, threads int) bool {
|
||||
ismining := self.backend.IsMining()
|
||||
if shouldmine && !ismining {
|
||||
err := self.backend.StartMining(threads)
|
||||
err := self.backend.StartMining(threads, "")
|
||||
return err == nil
|
||||
}
|
||||
if ismining && !shouldmine {
|
||||
|
|
Loading…
Reference in New Issue