본문 바로가기
Openstack/vGPU & GPU

Kolla-ansible Openstack VGPU 설정 방법

by Miners1205 2023. 11. 27.
반응형

Kolla-ansible Openstack vGPU(Virtual GPU) 설정 방법


Kolla-ansible Openstack vGPU 사용을 위한 Openstack Host 환경 설정을 한다.

  • Openstack vGPU Host 환경 세팅

1. BIOS 세팅

### BIOS 기본 세팅 ###

VT-d on
VT-x on

 

 

2. Host OS 설치

### 지원 되는 OS 설치 ###

o 본 설정에 맞는 OS를 설치한다.

o Ubuntu, Rocky, CentOS 등

o 설치 버전 : Rocky 8

 

3. Host OS 환경 설정

### NVIDIA 그래픽 드라이버 인식 확인

[root@gpu01]# lspci -nn |grep NVIDIA
c1:00.0 3D controller [0101]: NVIDIA Corporation TU104GL [Tesla T4] [10de:1eb8] (rev a1)


[root@gpu01]# cat /etc/default/grub
GRUB_TIMEOUT=5
GRUB_DISTRIBUTOR="$(sed 's, release .*$,,g' /etc/system-release)"
GRUB_DEFAULT=saved
GRUB_DISABLE_SUBMENU=true
GRUB_TERMINAL_OUTPUT="console"
GRUB_CMDLINE_LINUX="crashkernel=auto resume=/dev/mapper/rl-swap rd.lvm.lv=rl/root rd.lvm.lv=rl/swap intel_iommu=on vfio-pci.ids=10de:1eb8
GRUB_DISABLE_RECOVERY="true"
GRUB_ENABLE_BLSCFG=true


[root@gpu01]# grub2-mkconfig -o /boot/efi/EFI/rocky/grub.cfg

[root@gpu01]#  mv /boot/initramfs-$(uname -r).img /boot/initramfs-$(uname -r).img.bak

[root@gpu01]#  dracut -v /boot/initramfs-$(uname -r).img $(uname -r)

 

5. 서버 재기동

### blacklist 내용 추가
[root@gpu01]#  vim /etc/modprobe.d/blacklist.conf
blacklist nouveau



### 서버 재기동
[root@gpu01]# reboot

[root@gpu01]# lsmod | grep nouveau
-> 출력값 X

 

 

6. NVIDIA 패키지 설치

### NVIDIA VGPU 패키지 설치를 위한 사전 작업

[root@gpu01]#  dnf install make gcc -y

[root@gpu01]#
  yum group install "Development Tools"

### 필요시 설치

[root@gpu01]# dnf install gcc*



### NVIDIA VGPU 설치시 해당 설정 요청에 따른 추가
[root@gpu01]#  cat nvidia-installer-disable-nouveau.conf
# generated by nvidia-installer
blacklist nouveau
options nouveau modeset=0


### NVIDIA 패키지 공홈 다운 및 설치

[root@gpu01]#  ./NVIDIA-Linux-x86_64-510.73.06-vgpu-kvm.run
 
[root@gpu01]#  nvidia-smi

[root@gpu01]#  reboot    ← reboot 해야 mdev_bus 가 보인다

[root@gpu01]#   ls /sys/class/mdev_bus/*/mdev_supported_types
nvidia-222  nvidia-224  nvidia-226  nvidia-228  nvidia-230  nvidia-232  nvidia-234  nvidia-319  nvidia-321
nvidia-223  nvidia-225  nvidia-227  nvidia-229  nvidia-231  nvidia-233  nvidia-252  nvidia-320 


### 위 폴더 및 리스트가 뜨면 확인 VGPU 기본 환경 설정 완료.


 

 

 

7. Openstack Config 수정

Docker container 기반 Openstack Kolla-ansible 설치

####################################################################################   
### Contoller 서버 nova-api, nova.conf 설정값 추가



[pci]
alias = { "vendor_id":"10de", "product_id":"1eb8", "device_type":"type-PF", "name":"T4" }
passthrough_whitelist = { "vendor_id":"10de", "product_id":"1eb8" }



### controller 서버  nova-scheduler nova.conf 설정 추가
[filter_scheduler]
enabled_filters = RetryFilter,AvailabilityZoneFilter,ComputeFilter,ComputeCapabilitiesFilter,ImagePropertiesFilter,ServerGroupAntiAffinityFilter,ServerGroupAffinityFilter,NUMATopologyFilter,AggregateInstanceExtraSpecsFilter,PciPassthroughFilter
available_filters = nova.scheduler.filters.all_filters

####################################################################################

### Nova Compute Node nova.conf 설정


[devices]
enabled_vgpu_types = nvidia-223

####################################################################################

[root@gpu01]#  docker restart nova_compute

 

Graphic Card : Tesla V100S
- Openstack 설정값 vGPU 사양 설명
1 nvidia-355 num_heads=4, frl_config=60, framebuffer=1024M, max_resolution=5120x2880, max_instance=32
2 nvidia-356 num_heads=4, frl_config=60, framebuffer=2048M, max_resolution=7680x4320, max_instance=16
3 nvidia-357 num_heads=4, frl_config=60, framebuffer=4096M, max_resolution=7680x4320, max_instance=8
4 nvidia-358 num_heads=4, frl_config=60, framebuffer=8192M, max_resolution=7680x4320, max_instance=4
5 nvidia-359 num_heads=4, frl_config=60, framebuffer=16384M, max_resolution=7680x4320, max_instance=2
6 nvidia-360 num_heads=4, frl_config=60, framebuffer=32768M, max_resolution=7680x4320, max_instance=1
7 nvidia-361 num_heads=1, frl_config=60, framebuffer=4096M, max_resolution=4096x2160, max_instance=8
8 nvidia-362 num_heads=1, frl_config=60, framebuffer=8192M, max_resolution=4096x2160, max_instance=4
9 nvidia-363 num_heads=1, frl_config=60, framebuffer=16384M, max_resolution=4096x2160, max_instance=2
10 nvidia-364 num_heads=1, frl_config=60, framebuffer=32768M, max_resolution=4096x2160, max_instance=1
11 nvidia-365 num_heads=1, frl_config=60, framebuffer=1024M, max_resolution=1280x1024, max_instance=32
12 nvidia-366 num_heads=1, frl_config=60, framebuffer=2048M, max_resolution=1280x1024, max_instance=16
13 nvidia-367 num_heads=1, frl_config=60, framebuffer=4096M, max_resolution=1280x1024, max_instance=8
14 nvidia-368 num_heads=1, frl_config=60, framebuffer=8192M, max_resolution=1280x1024, max_instance=4
15 nvidia-369 num_heads=1, frl_config=60, framebuffer=16384M, max_resolution=1280x1024, max_instance=2
16 nvidia-370 num_heads=1, frl_config=60, framebuffer=32768M, max_resolution=1280x1024, max_instance=1
17 nvidia-371 num_heads=4, frl_config=45, framebuffer=1024M, max_resolution=5120x2880, max_instance=32
18 nvidia-373 num_heads=4, frl_config=45, framebuffer=2048M, max_resolution=5120x2880, max_instance=16

 

Graphic Card : Tesla T4
- Openstack 설정값 vGPU 사양 설명
1 nvidia-222 num_heads=4, frl_config=45, framebuffer=1024M, max_resolution=5120x2880, max_instance=16
2 nvidia-223 num_heads=4, frl_config=45, framebuffer=2048M, max_resolution=5120x2880, max_instance=8
3 nvidia-224 num_heads=4, frl_config=45, framebuffer=2048M, max_resolution=5120x2880, max_instance=8
4 nvidia-225 num_heads=1, frl_config=60, framebuffer=1024M, max_resolution=1280x1024, max_instance=16
5 nvidia-226 num_heads=1, frl_config=60, framebuffer=2048M, max_resolution=1280x1024, max_instance=8
6 nvidia-227 num_heads=1, frl_config=60, framebuffer=4096M, max_resolution=1280x1024, max_instance=4
7 nvidia-228 num_heads=1, frl_config=60, framebuffer=8192M, max_resolution=1280x1024, max_instance=2
8 nvidia-229 num_heads=1, frl_config=60, framebuffer=16384M, max_resolution=1280x1024, max_instance=1
9 nvidia-230 num_heads=4, frl_config=60, framebuffer=1024M, max_resolution=5120x2880, max_instance=16
10 nvidia-231 num_heads=4, frl_config=60, framebuffer=2048M, max_resolution=7680x4320, max_instance=8
11 nvidia-232 num_heads=4, frl_config=60, framebuffer=4096M, max_resolution=7680x4320, max_instance=4
12 nvidia-233 num_heads=4, frl_config=60, framebuffer=8192M, max_resolution=7680x4320, max_instance=2
13 nvidia-234 num_heads=4, frl_config=60, framebuffer=16384M, max_resolution=7680x4320, max_instance=1
14 nvidia-252 num_heads=4, frl_config=45, framebuffer=1024M, max_resolution=5120x2880, max_instance=16
15 nvidia-319 num_heads=1, frl_config=60, framebuffer=4096M, max_resolution=4096x2160, max_instance=4
16 nvidia-320 num_heads=1, frl_config=60, framebuffer=8192M, max_resolution=4096x2160, max_instance=2
17 nvidia-321 num_heads=1, frl_config=60, framebuffer=16384M, max_resolution=4096x2160, max_instance=1

8. Openstack Flavor 생성

### vGPU Flavor 생성, 설정
[root@controller01]#  openstack flavor create --vcpus 2 --ram 2048 --disk 100 vgputest01
[root@controller01]#  openstack flavor set vgputest01 --property "resources:VGPU=1"

  •  vgpu01 사양에 vgpu를 1개씩 사용함.
  • 위 설정대로 nvidia-184 세팅시 해당 사양으로 인스턴스 2개 생성 가능

 

 

GPU & vGPU 참고 사이트

 

반응형