#if 0 SRC=${0##*/} BIN=${SRC%.*} gcc $SRC -o $BIN -lm -lvulkan && ./$BIN && rm $BIN exit 0 #endif #include #include #include #include "compute_module.inl.h" typedef signed char i8; typedef signed short i16; typedef signed int i32; typedef signed long long i64; typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; typedef float f32; typedef double f64; typedef signed char c8; enum { MAX_DEVICE_COUNT = 16, MAX_QUEUE_FAMILY_COUNT = 64, BUFFER_SIZE = 64, GROUP_COUNT_X = 1, GROUP_COUNT_Y = 1, GROUP_COUNT_Z = 1, }; f32 queue_priorities[] = { 1.f, }; u32 queue_family_index = 0; VkInstance instance; VkPhysicalDevice physical_device; VkDevice device; VkQueue queue; VkBuffer buffer; VkDeviceMemory buffer_memory; VkDescriptorSetLayout descriptor_set_layout; VkDescriptorPool descriptor_pool; VkDescriptorSet descriptor_set; VkShaderModule compute_module; VkPipelineLayout pipeline_layout; VkPipeline pipeline; VkCommandPool command_pool; VkCommandBuffer command_buffer; int main(int argc, char **argv) { // Create instance // { VkApplicationInfo info_application = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .apiVersion = VK_API_VERSION_1_1, }; VkInstanceCreateInfo info_instance_create = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &info_application, }; if (vkCreateInstance(&info_instance_create, NULL, &instance) != VK_SUCCESS) { printf("vkCreateInstance failed.\n"); return -1; } } // Find physical device // { u32 device_count = MAX_DEVICE_COUNT; VkPhysicalDevice devices[MAX_DEVICE_COUNT]; VkResult res = vkEnumeratePhysicalDevices(instance, &device_count, devices); if (res != VK_SUCCESS && res != VK_INCOMPLETE) { printf("vkEnumeratePhysicalDevices failed.\n"); return -1; } if (device_count <= 0) { printf("Compatible physical device not found.\n"); return -1; } u32 i = 0; VkPhysicalDeviceProperties properties; for (; i < device_count; ++i) { vkGetPhysicalDeviceProperties(devices[i], &properties); if (strstr(properties.deviceName, "NVIDIA") != NULL || strstr(properties.deviceName, "AMD") != NULL) { physical_device = devices[i]; break; } } if (i >= device_count) physical_device = devices[0]; vkGetPhysicalDeviceProperties(physical_device, &properties); printf("Physical device selected: %s\n", properties.deviceName); } // Find queue family // { u32 queue_family_count = MAX_QUEUE_FAMILY_COUNT; VkQueueFamilyProperties queue_families[MAX_QUEUE_FAMILY_COUNT]; vkGetPhysicalDeviceQueueFamilyProperties( physical_device, &queue_family_count, queue_families); u32 index = 0; for (; index < queue_family_count; ++index) if (queue_families[index].queueCount > 0 && (queue_families[index].queueFlags & VK_QUEUE_COMPUTE_BIT)) break; if (index >= queue_family_count) { printf("Compatible queue family not found.\n"); return -1; } queue_family_index = index; } // Create logical device // { VkDeviceQueueCreateInfo info_queue_create = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueCount = 1, .pQueuePriorities = queue_priorities, .queueFamilyIndex = queue_family_index, }; VkPhysicalDeviceFeatures device_features; memset(&device_features, 0, sizeof device_features); VkDeviceCreateInfo info_device_create = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pQueueCreateInfos = &info_queue_create, .queueCreateInfoCount = 1, .pEnabledFeatures = &device_features, }; if (vkCreateDevice(physical_device, &info_device_create, NULL, &device) != VK_SUCCESS) { printf("vkCreateDevice failed.\n"); return -1; } vkGetDeviceQueue(device, queue_family_index, 0, &queue); } // Create buffer // { VkBufferCreateInfo info_buffer_create = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = BUFFER_SIZE, .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; if (vkCreateBuffer(device, &info_buffer_create, NULL, &buffer) != VK_SUCCESS) { printf("vkCreateBuffer failed.\n"); return -1; } VkMemoryRequirements memory_requirements; VkPhysicalDeviceMemoryProperties memory_properties; vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); VkMemoryAllocateInfo info_allocate = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = memory_requirements.size, }; vkGetPhysicalDeviceMemoryProperties(physical_device, &memory_properties); u32 i = 0; u32 properties = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; for (; i < memory_properties.memoryTypeCount; ++i) if ((memory_requirements.memoryTypeBits & (1 << i)) && ((memory_properties.memoryTypes[i].propertyFlags & properties) == properties)) { info_allocate.memoryTypeIndex = i; break; } if (i >= memory_properties.memoryTypeCount) { printf("Compatible memory properties not found.\n"); return -1; } if (vkAllocateMemory(device, &info_allocate, NULL, &buffer_memory) != VK_SUCCESS) { printf("vkAllocateMemory failed.\n"); return -1; } if (vkBindBufferMemory(device, buffer, buffer_memory, 0) != VK_SUCCESS) { printf("vkBindBufferMemory failed.\n"); return -1; } } // Create descriptor set layout // { VkDescriptorSetLayoutBinding descriptor_set_layout_binding = { .binding = 0, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, }; VkDescriptorSetLayoutCreateInfo info_descriptor_set_layout_create = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .bindingCount = 1, .pBindings = &descriptor_set_layout_binding, }; if (vkCreateDescriptorSetLayout( device, &info_descriptor_set_layout_create, NULL, &descriptor_set_layout) != VK_SUCCESS) { printf("vkCreateDescriptorSetLayout failed.\n"); return -1; } } // Create descriptor set // { VkDescriptorPoolSize descriptor_pool_size = { .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, }; VkDescriptorPoolCreateInfo info_descriptor_pool_create = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .maxSets = 1, .poolSizeCount = 1, .pPoolSizes = &descriptor_pool_size, }; if (vkCreateDescriptorPool(device, &info_descriptor_pool_create, NULL, &descriptor_pool) != VK_SUCCESS) { printf("vkCreateDescriptorPool failed.\n"); return -1; } VkDescriptorSetAllocateInfo info_descriptor_set_allocate = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = descriptor_pool, .descriptorSetCount = 1, .pSetLayouts = &descriptor_set_layout, }; if (vkAllocateDescriptorSets(device, &info_descriptor_set_allocate, &descriptor_set) != VK_SUCCESS) { printf("vkAllocateDescriptorSets failed.\n"); return -1; } VkDescriptorBufferInfo info_descriptor_buffer = { .buffer = buffer, .offset = 0, .range = BUFFER_SIZE, }; VkWriteDescriptorSet write_descriptor_set = { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstSet = descriptor_set, .dstBinding = 0, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .pBufferInfo = &info_descriptor_buffer, }; vkUpdateDescriptorSets(device, 1, &write_descriptor_set, 0, NULL); } // Create compute pipeline // { VkShaderModuleCreateInfo info_module_create = { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pCode = COMPUTE_MODULE_CODE, .codeSize = sizeof COMPUTE_MODULE_CODE, }; if (vkCreateShaderModule(device, &info_module_create, NULL, &compute_module) != VK_SUCCESS) { printf("vkCreateShaderModule failed.\n"); return -1; } VkPipelineShaderStageCreateInfo info_shader_stage_create = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = compute_module, .pName = "main", }; VkPipelineLayoutCreateInfo info_pipeline_layout_create = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, .pSetLayouts = &descriptor_set_layout, }; if (vkCreatePipelineLayout(device, &info_pipeline_layout_create, NULL, &pipeline_layout) != VK_SUCCESS) { printf("vkCreatePipelineLayout failed.\n"); return -1; } VkComputePipelineCreateInfo info_pipeline_create = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = info_shader_stage_create, .layout = pipeline_layout, }; if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &info_pipeline_create, NULL, &pipeline)) { printf("vkCreateComputePipelines failed.\n"); return 0; } } // Create command buffer // { VkCommandPoolCreateInfo info_command_pool_create = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .queueFamilyIndex = queue_family_index, }; if (vkCreateCommandPool(device, &info_command_pool_create, NULL, &command_pool) != VK_SUCCESS) { printf("vkCreateCommandPool failed.\n"); return -1; } VkCommandBufferAllocateInfo info_command_buffer_allocate = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = command_pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; if (vkAllocateCommandBuffers(device, &info_command_buffer_allocate, &command_buffer) != VK_SUCCESS) { printf("vkAllocateCommandBuffers failed.\n"); return -1; } VkCommandBufferBeginInfo info_begin = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; if (vkBeginCommandBuffer(command_buffer, &info_begin) != VK_SUCCESS) { printf("vkBeginCommandBuffer failed.\n"); return -1; } vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); vkCmdBindDescriptorSets( command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 1, &descriptor_set, 0, NULL); vkCmdDispatch(command_buffer, GROUP_COUNT_X, GROUP_COUNT_Y, GROUP_COUNT_Z); if (vkEndCommandBuffer(command_buffer) != VK_SUCCESS) { printf("vkEndCommandBuffer failed.\n"); return -1; } } // Run command buffer // { VkSubmitInfo info_submit = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &command_buffer, }; VkFenceCreateInfo info_fence_create = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, }; VkFence fence; if (vkCreateFence(device, &info_fence_create, NULL, &fence) != VK_SUCCESS) { printf("vkCreateFence failed.\n"); return -1; } if (vkQueueSubmit(queue, 1, &info_submit, fence) != VK_SUCCESS) { printf("vkQueueSubmit failed.\n"); return -1; } if (vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000) != VK_SUCCESS) { printf("vkWaitForFences failed.\n"); return -1; } vkDestroyFence(device, fence, NULL); } // Read output buffer // { u32 *p = NULL; if (vkMapMemory(device, buffer_memory, 0, BUFFER_SIZE, 0, (void **) &p) != VK_SUCCESS) { printf("vkMapMemory failed.\n"); return -1; } printf("Data:"); for (u32 i = 0; i < BUFFER_SIZE / 4; i++) printf(" %2d", (int) p[i]); printf("\n"); vkUnmapMemory(device, buffer_memory); } // Cleanup // { vkFreeMemory(device, buffer_memory, NULL); vkDestroyBuffer(device, buffer, NULL); vkDestroyShaderModule(device, compute_module, NULL); vkDestroyDescriptorPool(device, descriptor_pool, NULL); vkDestroyDescriptorSetLayout(device, descriptor_set_layout, NULL); vkDestroyPipelineLayout(device, pipeline_layout, NULL); vkDestroyPipeline(device, pipeline, NULL); vkDestroyCommandPool(device, command_pool, NULL); vkDestroyDevice(device, NULL); vkDestroyInstance(instance, NULL); } printf("OK\n"); return 0; }