almost done readme

main
Brett 2023-04-05 00:47:58 -04:00
parent 587f7909fa
commit 819549b8c8
15 changed files with 347 additions and 39 deletions

31
readme/references.bib Normal file
View File

@ -0,0 +1,31 @@
@misc{computeshader,
title="{OpenGL Compute Shaders}",
author="Mike Bailey",
year="2021",
howpublished={\url{"https://web.engr.oregonstate.edu/~mjb/cs519/Handouts/compute.shader.2pp.pdf"}},
note={Accessed: 2023-04-01}
},
@misc{glref,
title="{OpenGL Reference Manual}",
author="Krnonos Group",
year="2014",
howpublished={\url{"https://registry.khronos.org/OpenGL-Refpages/gl4/"}},
},
@misc{gpuparticles,
title="{Compute-Based GPU Particle Systems}",
author="Gareth Thomas",
year="2014",
howpublished={\url{"https://ubm-twvideo01.s3.amazonaws.com/o1/vault/GDC2014/Presentations/Gareth_Thomas_Compute-based_GPU_Particle.pdf"}},
},
@misc{geometry,
title="{Particle Billboarding with the Geometry Shader}",
author="JeGX",
year="2014",
howpublished={\url{"https://www.geeks3d.com/20140815/particle-billboarding-with-the-geometry-shader-glsl/"}},
},
@misc{amdprogram,
title="{ATI Radeon HD 2000 programming guide}",
author="Emil Persson",
year="2007",
howpublished={\url{"https://web.archive.org/web/20160722164341/http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/ATI_Radeon_HD_2000_programming_guide.pdf"}},
}

BIN
readme/screenshot003.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 906 KiB

BIN
readme/screenshot004.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

BIN
readme/screenshot005.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 466 KiB

View File

@ -28,7 +28,40 @@
\@writefile{lof}{\addvspace {10\p@ }} \@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }}
\newlabel{chap:hp}{{2}{4}{Performance Mode}{chapter.2}{}} \newlabel{chap:hp}{{2}{4}{Performance Mode}{chapter.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.0.1}Design}{4}{subsection.2.0.1}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {2.1}Design}{4}{section.2.1}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces }}{5}{figure.2.1}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {2.2}Renderer}{4}{section.2.2}\protected@file@percent }
\newlabel{fig:screenshot002}{{2.1}{5}{}{figure.2.1}{}} \citation{amdprogram}
\gdef \@abspage@last{7} \@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}Rendering Pipeline}{5}{subsection.2.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{Vertex Shader}{5}{subsubsection*.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{Geometry Shader}{5}{subsubsection*.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{Fragment Shader}{5}{subsubsection*.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2.3}Compute Shader}{5}{section.2.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{Direction Offseting}{5}{subsubsection*.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2.4}Usage}{6}{section.2.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.1}Building}{6}{subsection.2.4.1}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces Linux build commands.}}{6}{figure.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.2}Running}{6}{subsection.2.4.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2.5}Future Plans}{6}{section.2.5}\protected@file@percent }
\newlabel{sec:fp}{{2.5}{6}{Future Plans}{section.2.5}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.5.1}Lists}{6}{subsection.2.5.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.5.2}Bitonic Sort}{7}{subsection.2.5.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.5.3}Tiling}{7}{subsection.2.5.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.5.4}Occlusion Queries}{7}{subsection.2.5.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2.6}Figures}{7}{section.2.6}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2.2}{\ignorespaces }}{7}{figure.2.2}\protected@file@percent }
\newlabel{fig:screenshot002}{{2.2}{7}{}{figure.2.2}{}}
\citation{*}
\bibstyle{plain}
\bibdata{references.bib}
\@writefile{lof}{\contentsline {figure}{\numberline {2.3}{\ignorespaces 20 million particles on the new renderer}}{8}{figure.2.3}\protected@file@percent }
\newlabel{fig:newrender}{{2.3}{8}{20 million particles on the new renderer}{figure.2.3}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2.4}{\ignorespaces 6.4 million particles, fillrate (not compute) limited.}}{9}{figure.2.4}\protected@file@percent }
\newlabel{fig:phyiscsrend}{{2.4}{9}{6.4 million particles, fillrate (not compute) limited}{figure.2.4}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2.5}{\ignorespaces 6.4 million particles, zoomed out, showing fillrate as the limiting factor in speed.}}{10}{figure.2.5}\protected@file@percent }
\newlabel{fig:phyiscsrendfill}{{2.5}{10}{6.4 million particles, zoomed out, showing fillrate as the limiting factor in speed}{figure.2.5}{}}
\bibcite{computeshader}{1}
\bibcite{glref}{2}
\bibcite{geometry}{3}
\bibcite{amdprogram}{4}
\bibcite{gpuparticles}{5}
\gdef \@abspage@last{13}

View File

@ -0,0 +1,37 @@
\begin{thebibliography}{1}
\bibitem{computeshader}
Mike Bailey.
\newblock {OpenGL Compute Shaders}.
\newblock
\url{"https://web.engr.oregonstate.edu/~mjb/cs519/Handouts/compute.shader.2pp.pdf"},
2021.
\newblock Accessed: 2023-04-01.
\bibitem{glref}
Krnonos Group.
\newblock {OpenGL Reference Manual}.
\newblock \url{"https://registry.khronos.org/OpenGL-Refpages/gl4/"}, 2014.
\bibitem{geometry}
JeGX.
\newblock {Particle Billboarding with the Geometry Shader}.
\newblock
\url{"https://www.geeks3d.com/20140815/particle-billboarding-with-the-geometry-shader-glsl/"},
2014.
\bibitem{amdprogram}
Emil Persson.
\newblock {ATI Radeon HD 2000 programming guide}.
\newblock
\url{"https://web.archive.org/web/20160722164341/http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/ATI_Radeon_HD_2000_programming_guide.pdf"},
2007.
\bibitem{gpuparticles}
Gareth Thomas.
\newblock {Compute-Based GPU Particle Systems}.
\newblock
\url{"https://ubm-twvideo01.s3.amazonaws.com/o1/vault/GDC2014/Presentations/Gareth_Thomas_Compute-based_GPU_Particle.pdf"},
2014.
\end{thebibliography}

View File

@ -0,0 +1,46 @@
This is BibTeX, Version 0.99d (TeX Live 2022/Debian)
Capacity: max_strings=200000, hash_size=200000, hash_prime=170003
The top-level auxiliary file: template_Report.aux
The style file: plain.bst
Database file #1: references.bib
You've used 5 entries,
2118 wiz_defined-function locations,
521 strings with 4830 characters,
and the built_in function-call counts, 1069 in all, are:
= -- 102
> -- 35
< -- 0
+ -- 15
- -- 10
* -- 32
:= -- 186
add.period$ -- 16
call.type$ -- 5
change.case$ -- 20
chr.to.int$ -- 0
cite$ -- 5
duplicate$ -- 35
empty$ -- 111
format.name$ -- 10
if$ -- 233
int.to.chr$ -- 0
int.to.str$ -- 5
missing$ -- 0
newline$ -- 29
num.names$ -- 10
pop$ -- 40
preamble$ -- 1
purify$ -- 15
quote$ -- 0
skip$ -- 34
stack$ -- 0
substring$ -- 25
swap$ -- 5
text.length$ -- 0
text.prefix$ -- 0
top$ -- 0
type$ -- 20
warning$ -- 0
while$ -- 10
width$ -- 6
write$ -- 54

View File

@ -1,4 +1,4 @@
This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Debian) (preloaded format=pdflatex 2023.2.25) 4 APR 2023 22:01 This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Debian) (preloaded format=pdflatex 2023.2.25) 5 APR 2023 00:44
entering extended mode entering extended mode
restricted \write18 enabled. restricted \write18 enabled.
%&-line parsing enabled. %&-line parsing enabled.
@ -335,7 +335,7 @@ LaTeX Font Info: External font `cmex10' loaded for size
(Font) <8> on input line 36. (Font) <8> on input line 36.
LaTeX Font Info: External font `cmex10' loaded for size LaTeX Font Info: External font `cmex10' loaded for size
(Font) <6> on input line 36. (Font) <6> on input line 36.
<screenshot001.png, id=44, 733.23938pt x 640.64343pt> <screenshot001.png, id=92, 733.23938pt x 640.64343pt>
File: screenshot001.png Graphic file (type png) File: screenshot001.png Graphic file (type png)
<use screenshot001.png> <use screenshot001.png>
Package pdftex.def Info: screenshot001.png used on input line 36. Package pdftex.def Info: screenshot001.png used on input line 36.
@ -371,39 +371,105 @@ Chapter 1.
] [3] ] [3]
Chapter 2. Chapter 2.
Package hyperref Warning: Difference (2) between bookmark levels is greater
(hyperref) than one, level fixed on input line 75.
<screenshot002.png, id=92, 1275.26437pt x 776.14969pt>
File: screenshot002.png Graphic file (type png)
<use screenshot002.png>
Package pdftex.def Info: screenshot002.png used on input line 79.
(pdftex.def) Requested size: 517.5pt x 314.95474pt.
[4 [4
] [5 <./screenshot002.png (PNG copy)>] (./template_Report.aux) ] [5]
<screenshot002.png, id=174, 1275.26437pt x 776.14969pt>
File: screenshot002.png Graphic file (type png)
<use screenshot002.png>
Package pdftex.def Info: screenshot002.png used on input line 117.
(pdftex.def) Requested size: 552.0021pt x 335.95251pt.
[6]
<screenshot003.png, id=180, 1084.05pt x 562.35094pt>
File: screenshot003.png Graphic file (type png)
<use screenshot003.png>
Package pdftex.def Info: screenshot003.png used on input line 123.
(pdftex.def) Requested size: 552.0021pt x 286.35756pt.
[7 <./screenshot002.png (PNG copy)>]
<screenshot004.png, id=185, 1084.80281pt x 562.35094pt>
File: screenshot004.png Graphic file (type png)
<use screenshot004.png>
Package pdftex.def Info: screenshot004.png used on input line 130.
(pdftex.def) Requested size: 552.0021pt x 286.1602pt.
<screenshot005.png, id=186, 1085.55562pt x 563.10374pt>
File: screenshot005.png Graphic file (type png)
<use screenshot005.png>
Package pdftex.def Info: screenshot005.png used on input line 137.
(pdftex.def) Requested size: 552.0021pt x 286.34566pt.
(./template_Report.bbl
[8 <./screenshot003.png (PNG copy)>] [9 <./screenshot004.png (PNG copy)>]
[10 <./screenshot005.png (PNG copy)>]
Underfull \hbox (badness 3954) in paragraph at lines 4--10
[]\OT1/cmr/m/n/10 Mike Bai-ley. OpenGL Com-pute Shaders. [][]$\OT1/cmtt/m/n/1
0 "https : / / web . engr .
[]
Underfull \hbox (badness 10000) in paragraph at lines 4--10
\OT1/cmtt/m/n/10 oregonstate . edu / []mjb / cs519 / Handouts / compute . shade
r . 2pp . pdf"$[][]\OT1/cmr/m/n/10 ,
[]
Underfull \hbox (badness 1107) in paragraph at lines 12--15
[]\OT1/cmr/m/n/10 Krnonos Group. OpenGL Ref-er-ence Man-ual. [][]$\OT1/cmtt/m
/n/10 "https : / / registry .
[]
Underfull \hbox (badness 10000) in paragraph at lines 17--22
[]\OT1/cmr/m/n/10 JeGX. Par-ti-cle Bill-board-ing with the Ge-om-e-
[]
Underfull \hbox (badness 10000) in paragraph at lines 17--22
\OT1/cmr/m/n/10 try Shader. [][]$\OT1/cmtt/m/n/10 "https : / / www . geeks3d .
com / 20140815 /
[]
Underfull \hbox (badness 10000) in paragraph at lines 24--29
\OT1/cmtt/m/n/10 / / web . archive . org / web / 20160722164341 / http : / / am
d-[]dev . wpengine .
[]
Underfull \hbox (badness 10000) in paragraph at lines 24--29
\OT1/cmtt/m/n/10 netdna-[]cdn . com / wordpress / media / 2012 / 10 / ATI _ Rad
eon _ HD _ 2000 _
[]
Underfull \hbox (badness 10000) in paragraph at lines 31--36
\OT1/cmtt/m/n/10 ubm-[]twvideo01 . s3 . amazonaws . com / o1 / vault / GDC2014
/ Presentations /
[]
) [11
] (./template_Report.aux)
Package rerunfilecheck Info: File `template_Report.out' has not changed. Package rerunfilecheck Info: File `template_Report.out' has not changed.
(rerunfilecheck) Checksum: CB123BBB1A862490A8F40DBEA1059868;1109. (rerunfilecheck) Checksum: 07FA6F01F76508B7028734DCD8C73298;2423.
) )
Here is how much of TeX's memory you used: Here is how much of TeX's memory you used:
10472 strings out of 477975 10301 strings out of 477975
162768 string characters out of 5839281 162272 string characters out of 5839281
1856330 words of memory out of 5000000 1858330 words of memory out of 5000000
30472 multiletter control sequences out of 15000+600000 30270 multiletter control sequences out of 15000+600000
517483 words of font info for 49 fonts, out of 8000000 for 9000 517651 words of font info for 50 fonts, out of 8000000 for 9000
59 hyphenation exceptions out of 8191 59 hyphenation exceptions out of 8191
75i,6n,76p,530b,1101s stack positions out of 10000i,1000n,20000p,200000b,200000s 75i,6n,76p,1346b,460s stack positions out of 10000i,1000n,20000p,200000b,200000s
</home/brett/.texlive2022/texmf-var/fonts/pk/ljfour/jknappen/ec/tcrm1000.600 </home/brett/.texlive2022/texmf-var/fonts/pk/ljfour/jknappen/ec/tcrm1000.600
pk></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx10.pfb></u pk></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx10.pfb></u
sr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx12.pfb></usr/sha sr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx12.pfb></usr/sha
re/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb></usr/share/texl re/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb></usr/share/texl
ive/texmf-dist/fonts/type1/public/amsfonts/cm/cmr12.pfb></usr/share/texlive/tex ive/texmf-dist/fonts/type1/public/amsfonts/cm/cmr12.pfb></usr/share/texlive/tex
mf-dist/fonts/type1/public/amsfonts/cm/cmr17.pfb> mf-dist/fonts/type1/public/amsfonts/cm/cmr17.pfb></usr/share/texlive/texmf-dist
Output written on template_Report.pdf (7 pages, 1742942 bytes). /fonts/type1/public/amsfonts/cm/cmtt10.pfb>
Output written on template_Report.pdf (13 pages, 4340620 bytes).
PDF statistics: PDF statistics:
139 PDF objects out of 1000 (max. 8388607) 268 PDF objects out of 1000 (max. 8388607)
113 compressed objects within 2 object streams 231 compressed objects within 3 object streams
24 named destinations out of 1000 (max. 500000) 51 named destinations out of 1000 (max. 500000)
91 words of extra memory for PDF output out of 10000 (max. 10000000) 202 words of extra memory for PDF output out of 10000 (max. 10000000)

View File

@ -7,4 +7,16 @@
\BOOKMARK [2][-]{subsection.1.2.2}{\376\377\000B\000u\000i\000l\000d\000\040\000C\000o\000m\000m\000a\000n\000d\000s}{section.1.2}% 7 \BOOKMARK [2][-]{subsection.1.2.2}{\376\377\000B\000u\000i\000l\000d\000\040\000C\000o\000m\000m\000a\000n\000d\000s}{section.1.2}% 7
\BOOKMARK [1][-]{section.1.3}{\376\377\000U\000s\000a\000g\000e}{chapter.1}% 8 \BOOKMARK [1][-]{section.1.3}{\376\377\000U\000s\000a\000g\000e}{chapter.1}% 8
\BOOKMARK [0][-]{chapter.2}{\376\377\000P\000e\000r\000f\000o\000r\000m\000a\000n\000c\000e\000\040\000M\000o\000d\000e}{}% 9 \BOOKMARK [0][-]{chapter.2}{\376\377\000P\000e\000r\000f\000o\000r\000m\000a\000n\000c\000e\000\040\000M\000o\000d\000e}{}% 9
\BOOKMARK [1][-]{subsection.2.0.1}{\376\377\000D\000e\000s\000i\000g\000n}{chapter.2}% 10 \BOOKMARK [1][-]{section.2.1}{\376\377\000D\000e\000s\000i\000g\000n}{chapter.2}% 10
\BOOKMARK [1][-]{section.2.2}{\376\377\000R\000e\000n\000d\000e\000r\000e\000r}{chapter.2}% 11
\BOOKMARK [2][-]{subsection.2.2.1}{\376\377\000R\000e\000n\000d\000e\000r\000i\000n\000g\000\040\000P\000i\000p\000e\000l\000i\000n\000e}{section.2.2}% 12
\BOOKMARK [1][-]{section.2.3}{\376\377\000C\000o\000m\000p\000u\000t\000e\000\040\000S\000h\000a\000d\000e\000r}{chapter.2}% 13
\BOOKMARK [1][-]{section.2.4}{\376\377\000U\000s\000a\000g\000e}{chapter.2}% 14
\BOOKMARK [2][-]{subsection.2.4.1}{\376\377\000B\000u\000i\000l\000d\000i\000n\000g}{section.2.4}% 15
\BOOKMARK [2][-]{subsection.2.4.2}{\376\377\000R\000u\000n\000n\000i\000n\000g}{section.2.4}% 16
\BOOKMARK [1][-]{section.2.5}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000P\000l\000a\000n\000s}{chapter.2}% 17
\BOOKMARK [2][-]{subsection.2.5.1}{\376\377\000L\000i\000s\000t\000s}{section.2.5}% 18
\BOOKMARK [2][-]{subsection.2.5.2}{\376\377\000B\000i\000t\000o\000n\000i\000c\000\040\000S\000o\000r\000t}{section.2.5}% 19
\BOOKMARK [2][-]{subsection.2.5.3}{\376\377\000T\000i\000l\000i\000n\000g}{section.2.5}% 20
\BOOKMARK [2][-]{subsection.2.5.4}{\376\377\000O\000c\000c\000l\000u\000s\000i\000o\000n\000\040\000Q\000u\000e\000r\000i\000e\000s}{section.2.5}% 21
\BOOKMARK [1][-]{section.2.6}{\376\377\000F\000i\000g\000u\000r\000e\000s}{chapter.2}% 22

Binary file not shown.

Binary file not shown.

View File

@ -60,26 +60,90 @@ The assignment makes use of a non-standard OpenGL extention during texture loadi
\subsection{Build Commands} \subsection{Build Commands}
\begin{figure}[H] \begin{figure}[H]
\centering \centering
\begin{lstlisting} \begin{verbatim}
mkdir build && cd build mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release ../ cmake -DCMAKE_BUILD_TYPE=Release ../
make -j 16 make -j 16
./assign3 ./assign3
\end{lstlisting} \end{verbatim}
\caption{Linux build commands.} \caption{Linux build commands.}
\end{figure} \end{figure}
\section{Usage} \section{Usage}
Keybindings and usage instructions are printed at program startup. Keybindings and usage instructions are printed at program startup.
\chapter{Performance Mode}\label{chap:hp} \chapter{Performance Mode}\label{chap:hp}
\subsection{Design} \section{Design}
The high performance mode is the result of a weekend hack-a-ton where I wanted to see how easy it would be to implement a million particle+ renderer. The rendering engine itself can handle around 20 million particles at about 60fps (\autoref{fig:screenshot002}). The high performance mode is the result of a weekend hack-a-ton where I wanted to see how easy it would be to implement a million particle+ renderer. If I had more time I would encapsulate the high\_perf.cpp file into the particle system class, allowing for multiple *and customizable* particle systems. If you wish to change settings, most are constants in shaders/physics.comp or high\_perf/high\_perf.cpp. The rendering engine itself can handle around 20 million particles at about 60fps (\autoref{fig:screenshot002}). With phyiscs enabled, the engine can handle about 6 million particles (\autoref{fig:phyiscsrend}) but as \autoref{fig:phyiscsrendfill} shows, the renderer is clearly fillrate limited. Solutions to increase the number of rendered particles are discussed in \autoref{sec:fp}. It should be noted that (\autoref{fig:screenshot002}) used a previous renderer which made use of a instanced "GL\_TRIANGLES" approach and did not have textures or billboarding. The new renderer (\autoref{fig:newrender}) makes use of "GL\_POINTS" with a geometry shader to generate the vertices and features billboarding/texturing. A compute shader is used before rendering to update the particle positions and directions on the GPU. This way there is no need to copy the data to and from the graphics card.
\section{Renderer}
The legacy OpenGL renderer uses display lists to speedup rendering of the particles. Although this method is faster than using the same draw commands inline, it is highly limited by driver overhead. Modern GPUs are deisgned to process massive amounts of data all at once and benfit from reducing the amount of synchronization between the GPU and CPU. As mentioned earlier the current renderer uses an vertex buffer object to store all particle positions and directions in one giant array. It then uses those points to render all particles in a single draw call, thereby reducing driver overhead.
\subsection{Rendering Pipeline}
\subsubsection{Vertex Shader}
The vertex shader is purely used to passthough the particle position to the geometry shader. Since the vertex shader does not have the ability to output multiple vertices (not easily at least), we have to use a geometry shader.
\subsubsection{Geometry Shader}
The geometry shader uses the up and right vectors from the inverse view matrix to generate a quad facing the camera. It takes in the particle position and outputs a triangle strip. This is a highly efficent opperation as according to AMD there is dedicated hardware to handle this particular geometry shader case\cite[p.~9]{amdprogram}.
\subsubsection{Fragment Shader}
The fragment shader is run once per pixel and is responsible for texturing the particles. I use a texture array as it can be bound once before rendering, therefore particles do not need to be sperated by texture. Using an array has the downside of every texture needs to be the same size, to solve this I resize the texture as it is loaded. Unforunately this will lead to some textures being distorted but the performance gain is worth it. The modern renderer is constrained by the lack of 'advanced' programming techniques, some of which are discussed in \autoref{sec:fp}.
\section{Compute Shader}
Compute shaders are very useful for embaressingly parallel tasks like updating particles. The compute shader is a very simple (nearly 1:1) translation of the CPU version of the particle system's update function. It handles 'dead' particles by reseting them to the inital position / direction. As a result particles are intialized with a random lifetime between 0 and the max lifetime to ensure even distribution. If you change the particle lifetime, please modify both constants!
\subsubsection{Direction Offseting}
Because generating random numbers on the GPU is hard (there is no dedicated hardware random number generator), I generate a random set of offsets at startup and upload these randoms to the GPU. The particle index is then used to access this buffer when the particle is reset; the result is a convincing distribution of particles. The large the number of particles the larger the offset buffer should be. Up to 6 million 8192 should be fine. If things look off consider increasing the value to some larger power of two. Make sure you update both constants here as well!
\section{Usage}
\subsection{Building}
Add "-DEXTRAS=ON" to the CMake command.
\begin{figure}[H] \begin{figure}[H]
\centerline{\includegraphics[width=1.5\linewidth]{screenshot002}} \centering
\begin{verbatim}
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DEXTRAS=ON ../
make -j 16
./assign3
\end{verbatim}
\caption{Linux build commands.}
\end{figure}
\subsection{Running}
All particles exist from the begining, which means all particles start at the inital position and slowly spread out. After starting the program but before moving around, you should press 'p' to allow the compute shader to run, once the particles spread out it is safe to move. The slow performance of all the particles in the same spot has to do with overdraw accessing and writing the same location of the depth texture (hard to do in parallel). Fillrate is a common issue with this particle renderer. See the future plans section for possible resolutions.
\section{Future Plans}\label{sec:fp}
Unfortunately because this is exam season, I do not have time to do anything more with this assignment. Furthermore I do not think the effort I've put in so far will be reflected in the value of the mark and any further improvements would be a waste. Below is a list of features I began looking into but as I have no experience with, they would require far too much experimentation and research to implement myself in a reasonable amount of time. As it is it took a weekend to implement something I was already somewhat familar with.
\subsection{Lists}
I would like to make it so all particles are not rendered all the time. Basically add a dead / alive particles list. This would prevent the issue of all particles starting in the same place, the low performance that causes and would be helpful in sorting.
\subsection{Bitonic Sort}
\subsection{Tiling}
\subsection{Occlusion Queries}
\section{Figures}
\begin{figure}[H]
\centerline{\includegraphics[width=1.6\linewidth]{screenshot002}}
\caption[]{20 million particles distributed in a 50x25x50 cube with load monitors} \caption[]{20 million particles distributed in a 50x25x50 cube with load monitors}
\label{fig:screenshot002} \label{fig:screenshot002}
\end{figure} \end{figure}
\begin{figure}[H]
\centerline{\includegraphics[width=1.6\linewidth]{screenshot003}}
\caption{20 million particles on the new renderer}
\label{fig:newrender}
\end{figure}
\begin{figure}
\centering
\centerline{\includegraphics[width=1.6\linewidth]{screenshot004}}
\caption{6.4 million particles, fillrate (not compute) limited.}
\label{fig:phyiscsrend}
\end{figure}
\begin{figure}
\centering
\centerline{\includegraphics[width=1.6\linewidth]{screenshot005}}
\caption{6.4 million particles, zoomed out, showing fillrate as the limiting factor in speed.}
\label{fig:phyiscsrendfill}
\end{figure}
\nocite{*}
\bibliographystyle{plain}
\bibliography{references.bib}
\end{document} \end{document}

View File

@ -7,4 +7,20 @@
\contentsline {subsection}{\numberline {1.2.2}Build Commands}{3}{subsection.1.2.2}% \contentsline {subsection}{\numberline {1.2.2}Build Commands}{3}{subsection.1.2.2}%
\contentsline {section}{\numberline {1.3}Usage}{3}{section.1.3}% \contentsline {section}{\numberline {1.3}Usage}{3}{section.1.3}%
\contentsline {chapter}{\numberline {2}Performance Mode}{4}{chapter.2}% \contentsline {chapter}{\numberline {2}Performance Mode}{4}{chapter.2}%
\contentsline {subsection}{\numberline {2.0.1}Design}{4}{subsection.2.0.1}% \contentsline {section}{\numberline {2.1}Design}{4}{section.2.1}%
\contentsline {section}{\numberline {2.2}Renderer}{4}{section.2.2}%
\contentsline {subsection}{\numberline {2.2.1}Rendering Pipeline}{5}{subsection.2.2.1}%
\contentsline {subsubsection}{Vertex Shader}{5}{subsubsection*.2}%
\contentsline {subsubsection}{Geometry Shader}{5}{subsubsection*.3}%
\contentsline {subsubsection}{Fragment Shader}{5}{subsubsection*.4}%
\contentsline {section}{\numberline {2.3}Compute Shader}{5}{section.2.3}%
\contentsline {subsubsection}{Direction Offseting}{5}{subsubsection*.5}%
\contentsline {section}{\numberline {2.4}Usage}{6}{section.2.4}%
\contentsline {subsection}{\numberline {2.4.1}Building}{6}{subsection.2.4.1}%
\contentsline {subsection}{\numberline {2.4.2}Running}{6}{subsection.2.4.2}%
\contentsline {section}{\numberline {2.5}Future Plans}{6}{section.2.5}%
\contentsline {subsection}{\numberline {2.5.1}Lists}{6}{subsection.2.5.1}%
\contentsline {subsection}{\numberline {2.5.2}Bitonic Sort}{7}{subsection.2.5.2}%
\contentsline {subsection}{\numberline {2.5.3}Tiling}{7}{subsection.2.5.3}%
\contentsline {subsection}{\numberline {2.5.4}Occlusion Queries}{7}{subsection.2.5.4}%
\contentsline {section}{\numberline {2.6}Figures}{7}{section.2.6}%

View File

@ -45,13 +45,16 @@ GLuint particleVAO;
// -------{Textures}------- // -------{Textures}-------
GLuint textureArrayID; GLuint textureArrayID;
// must make sure the texture list contains this number of textures otherwise weird errors will occur!
// !*might not crash*!
const unsigned int TEXTURE_COUNT = 10; const unsigned int TEXTURE_COUNT = 10;
const unsigned int TEXTURE_WIDTH = 512; const unsigned int TEXTURE_WIDTH = 512;
const unsigned int TEXTURE_HEIGHT = 512; const unsigned int TEXTURE_HEIGHT = 512;
// -------{Particles}------- // -------{Particles}-------
const unsigned int particle_count = 128 * 50000; const unsigned int particle_count = 128 * 5000; // must be a multiple of group size divisor!
const unsigned int offset_count = 8192; const unsigned int offset_count = 8192;
const float particle_lifetime = 25.0f;
// generally alignment to multiples of 4 floats helps performance, plus we can use that extra space for info we need. // generally alignment to multiples of 4 floats helps performance, plus we can use that extra space for info we need.
typedef struct { typedef struct {
@ -149,7 +152,7 @@ void init() {
blt::scoped_buffer<particle_record> translations{particle_count}; blt::scoped_buffer<particle_record> translations{particle_count};
blt::scoped_buffer<vec4> offsets{offset_count}; blt::scoped_buffer<vec4> offsets{offset_count};
blt::random<float> dir{-1, 1}; blt::random<float> dir{-1, 1};
blt::random<float> lifetime{0, 25}; blt::random<float> lifetime{0, particle_lifetime};
BLT_TRACE("Creating particles"); BLT_TRACE("Creating particles");
for (int i = 0; i < particle_count; i++) for (int i = 0; i < particle_count; i++)