#if 0 SRC=${0##*/} BIN=${SRC%.*} gcc $SRC -o $BIN -lm -lvulkan && ./$BIN && rm $BIN exit 0 #endif #include #include #include #include "compute_module.inl.h" typedef signed char i8; typedef signed short i16; typedef signed int i32; typedef signed long long i64; typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; typedef float f32; typedef double f64; typedef char c8; enum { MAX_DEVICE_COUNT = 16, MAX_QUEUE_FAMILY_COUNT = 64, BUFFER_SIZE = 64, GROUP_COUNT_X = 1, GROUP_COUNT_Y = 1, GROUP_COUNT_Z = 1, }; f32 queue_priorities[] = { 1.f, }; u32 queue_family_index = 0; VkInstance instance; VkPhysicalDevice physical_device; VkDevice device; VkQueue queue; VkBuffer buffer; VkDeviceMemory buffer_memory; VkDescriptorSetLayout descriptor_set_layout; VkDescriptorPool descriptor_pool; VkDescriptorSet descriptor_set; VkShaderModule compute_module; VkPipelineLayout pipeline_layout; VkPipeline pipeline; VkCommandPool command_pool; VkCommandBuffer command_buffer; i32 main(i32 argc, c8 **argv) { // Create instance // if (vkCreateInstance( &(VkInstanceCreateInfo) { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &(VkApplicationInfo) { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .apiVersion = VK_API_VERSION_1_1, }, }, NULL, &instance) != VK_SUCCESS) { printf("vkCreateInstance failed.\n"); return -1; } // Find physical device // { u32 device_count = MAX_DEVICE_COUNT; VkPhysicalDevice devices[MAX_DEVICE_COUNT]; VkResult res = vkEnumeratePhysicalDevices(instance, &device_count, devices); if (res != VK_SUCCESS && res != VK_INCOMPLETE) { printf("vkEnumeratePhysicalDevices failed.\n"); return -1; } if (device_count <= 0) { printf("Compatible physical device not found.\n"); return -1; } u32 i = 0; VkPhysicalDeviceProperties properties; for (; i < device_count; ++i) { vkGetPhysicalDeviceProperties(devices[i], &properties); if (strstr(properties.deviceName, "NVIDIA") != NULL || strstr(properties.deviceName, "AMD") != NULL) { physical_device = devices[i]; break; } } if (i >= device_count) physical_device = devices[0]; vkGetPhysicalDeviceProperties(physical_device, &properties); printf("Physical device selected: %s\n", properties.deviceName); } // Find queue family // { u32 queue_family_count = MAX_QUEUE_FAMILY_COUNT; VkQueueFamilyProperties queue_families[MAX_QUEUE_FAMILY_COUNT]; vkGetPhysicalDeviceQueueFamilyProperties( physical_device, &queue_family_count, queue_families); u32 index = 0; for (; index < queue_family_count; ++index) if (queue_families[index].queueCount > 0 && (queue_families[index].queueFlags & VK_QUEUE_COMPUTE_BIT)) break; if (index >= queue_family_count) { printf("Compatible queue family not found.\n"); return -1; } queue_family_index = index; } // Create logical device // { VkPhysicalDeviceFeatures device_features; memset(&device_features, 0, sizeof device_features); if (vkCreateDevice( physical_device, &(VkDeviceCreateInfo) { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pQueueCreateInfos = &(VkDeviceQueueCreateInfo) { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueCount = 1, .pQueuePriorities = queue_priorities, .queueFamilyIndex = queue_family_index, }, .queueCreateInfoCount = 1, .pEnabledFeatures = &device_features, }, NULL, &device) != VK_SUCCESS) { printf("vkCreateDevice failed.\n"); return -1; } vkGetDeviceQueue(device, queue_family_index, 0, &queue); } // Create buffer // { if (vkCreateBuffer( device, &(VkBufferCreateInfo) { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = BUFFER_SIZE, .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }, NULL, &buffer) != VK_SUCCESS) { printf("vkCreateBuffer failed.\n"); return -1; } VkMemoryRequirements memory_requirements; VkPhysicalDeviceMemoryProperties memory_properties; vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); vkGetPhysicalDeviceMemoryProperties(physical_device, &memory_properties); u32 i = 0; u32 properties = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; u32 memory_type_index = 0; for (; i < memory_properties.memoryTypeCount; ++i) if ((memory_requirements.memoryTypeBits & (1 << i)) && ((memory_properties.memoryTypes[i].propertyFlags & properties) == properties)) { memory_type_index = i; break; } if (i >= memory_properties.memoryTypeCount) { printf("Compatible memory properties not found.\n"); return -1; } if (vkAllocateMemory( device, &(VkMemoryAllocateInfo) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = memory_requirements.size, .memoryTypeIndex = memory_type_index, }, NULL, &buffer_memory) != VK_SUCCESS) { printf("vkAllocateMemory failed.\n"); return -1; } if (vkBindBufferMemory(device, buffer, buffer_memory, 0) != VK_SUCCESS) { printf("vkBindBufferMemory failed.\n"); return -1; } } // Create descriptor set layout // if (vkCreateDescriptorSetLayout( device, &(VkDescriptorSetLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .bindingCount = 1, .pBindings = &(VkDescriptorSetLayoutBinding) { .binding = 0, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, }, }, NULL, &descriptor_set_layout) != VK_SUCCESS) { printf("vkCreateDescriptorSetLayout failed.\n"); return -1; } // Create descriptor set // { if (vkCreateDescriptorPool( device, &(VkDescriptorPoolCreateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .maxSets = 1, .poolSizeCount = 1, .pPoolSizes = &(VkDescriptorPoolSize) { .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, }, }, NULL, &descriptor_pool) != VK_SUCCESS) { printf("vkCreateDescriptorPool failed.\n"); return -1; } if (vkAllocateDescriptorSets( device, &(VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = descriptor_pool, .descriptorSetCount = 1, .pSetLayouts = &descriptor_set_layout, }, &descriptor_set) != VK_SUCCESS) { printf("vkAllocateDescriptorSets failed.\n"); return -1; } vkUpdateDescriptorSets( device, 1, &(VkWriteDescriptorSet) { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstSet = descriptor_set, .dstBinding = 0, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .pBufferInfo = &(VkDescriptorBufferInfo) { .buffer = buffer, .offset = 0, .range = BUFFER_SIZE, }, }, 0, NULL); } // Create compute pipeline // { if (vkCreateShaderModule( device, &(VkShaderModuleCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pCode = COMPUTE_MODULE_CODE, .codeSize = sizeof COMPUTE_MODULE_CODE, }, NULL, &compute_module) != VK_SUCCESS) { printf("vkCreateShaderModule failed.\n"); return -1; } if (vkCreatePipelineLayout( device, &(VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, .pSetLayouts = &descriptor_set_layout, }, NULL, &pipeline_layout) != VK_SUCCESS) { printf("vkCreatePipelineLayout failed.\n"); return -1; } if (vkCreateComputePipelines( device, VK_NULL_HANDLE, 1, &(VkComputePipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = compute_module, .pName = "main", }, .layout = pipeline_layout, }, NULL, &pipeline ) != VK_SUCCESS) { printf("vkCreateComputePipelines failed.\n"); return 0; } } // Create command buffer // { if (vkCreateCommandPool( device, &(VkCommandPoolCreateInfo) { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .queueFamilyIndex = queue_family_index, }, NULL, &command_pool) != VK_SUCCESS) { printf("vkCreateCommandPool failed.\n"); return -1; } if (vkAllocateCommandBuffers( device, &(VkCommandBufferAllocateInfo) { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = command_pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }, &command_buffer) != VK_SUCCESS) { printf("vkAllocateCommandBuffers failed.\n"); return -1; } if (vkBeginCommandBuffer( command_buffer, &(VkCommandBufferBeginInfo) { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }) != VK_SUCCESS) { printf("vkBeginCommandBuffer failed.\n"); return -1; } vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); vkCmdBindDescriptorSets( command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 1, &descriptor_set, 0, NULL); vkCmdDispatch(command_buffer, GROUP_COUNT_X, GROUP_COUNT_Y, GROUP_COUNT_Z); if (vkEndCommandBuffer(command_buffer) != VK_SUCCESS) { printf("vkEndCommandBuffer failed.\n"); return -1; } } // Run command buffer // { VkFence fence; if (vkCreateFence( device, &(VkFenceCreateInfo) { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, }, NULL, &fence) != VK_SUCCESS) { printf("vkCreateFence failed.\n"); return -1; } if (vkQueueSubmit(queue, 1, &(VkSubmitInfo) { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &command_buffer, }, fence) != VK_SUCCESS) { printf("vkQueueSubmit failed.\n"); return -1; } if (vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000) != VK_SUCCESS) { printf("vkWaitForFences failed.\n"); return -1; } vkDestroyFence(device, fence, NULL); } // Read output buffer // { u32 *p = NULL; if (vkMapMemory(device, buffer_memory, 0, BUFFER_SIZE, 0, (void **) &p) != VK_SUCCESS) { printf("vkMapMemory failed.\n"); return -1; } printf("Data:"); for (u32 i = 0; i < BUFFER_SIZE / 4; i++) printf(" %2d", (int) p[i]); printf("\n"); vkUnmapMemory(device, buffer_memory); } // Cleanup // { vkFreeMemory(device, buffer_memory, NULL); vkDestroyBuffer(device, buffer, NULL); vkDestroyShaderModule(device, compute_module, NULL); vkDestroyDescriptorPool(device, descriptor_pool, NULL); vkDestroyDescriptorSetLayout(device, descriptor_set_layout, NULL); vkDestroyPipelineLayout(device, pipeline_layout, NULL); vkDestroyPipeline(device, pipeline, NULL); vkDestroyCommandPool(device, command_pool, NULL); vkDestroyDevice(device, NULL); vkDestroyInstance(instance, NULL); } printf("OK\n"); return 0; }