Scripts/gl_doc_generator.py

217 lines
5.8 KiB
Python
Raw Permalink Normal View History

2023-12-22 12:28:02 -05:00
from bs4 import BeautifulSoup
import requests
import re
import sys
def open_url(url):
return BeautifulSoup(requests.get(url).content, "html.parser")
def extract(o):
if o == None:
return ""
else:
return o.get_text()
def replace_tag(element, old_tag_str, new_tag_str, **kwargs):
elements = element.find_all(old_tag_str, **kwargs)
for tag in elements:
new_tag = soup.new_tag(new_tag_str)
new_tag.string = tag.text
tag.replace_with(new_tag)
def process_paragraphs(p_tags):
paragraphs = []
for p in p_tags:
replace_tag(p, "code", 'b')
text = p.get_text()
text = re.sub("\n[\s]+", " ", text)
text = text.replace("\n", "")
if text.isspace() or len(text) == 0:
continue
paragraphs.append(text.strip())
return paragraphs
def limit_width(line, padding_str = "", first_str = None, width = 120):
if (first_str == None):
first_str = padding_str
results = []
line_str = ""
is_first = True
needs_new_line = False
for i, c in enumerate(line):
if (i % width == width - 1):
needs_new_line = True
if (needs_new_line and c == ' '):
if (is_first):
results.append(first_str + line_str)
is_first = False
else:
results.append(padding_str + line_str)
line_str = ""
needs_new_line = False
else:
line_str += c
if (is_first):
results.append(first_str + line_str)
else:
results.append(padding_str + line_str)
return results
def process_name(div):
v = extract(div.find("p")).split('')
if len(v) == 1:
return "@name" + v[0]
str = "@name " + v[0] + "-"
for i in range(1, len(v)):
str += v[i]
return str
def process_spec(div):
tables = div.find_all("table", class_="funcprototype-table")
funcs = []
for table in tables:
func = ""
rows = table.find_all("tr")
for row in rows:
datas = row.find_all("td")
for data in datas:
text = data.get_text()
if text is None or text.isspace():
continue
func += text.replace("\n", "").replace("\r", "")
funcs.append("@code " + func + " @endcode")
return funcs
def process_params(div):
if div is None:
return []
parent = div.find("div", class_="variablelist").find(class_="variablelist")
list = parent.find_all(recursive=False)
params = []
for p,d in zip(list[::2], list[1::2]):
param = p.get_text().replace("\n", "")
d = d.find()
replace_tag(d, "code", 'b')
line = ""
for content in d.contents:
line += re.sub("\n[\s]+", " ", str(content))
line_copy = ""
for i, c in enumerate(line):
line_copy += c
if (c == '.' or c == ',' ) and (i + 1 < len(line)):
if line[i + 1] != ' ':
line_copy += ' '
line = line_copy.strip()
param_str = "@param " + param + " "
padding_str = ""
for i in range(len(param_str)):
padding_str += ' '
params.extend(limit_width(line, padding_str, param_str))
return params
def process_desc(div):
if div is None:
return []
div = div.find_all('p')
pg = process_paragraphs(div)
results = []
for f in pg:
results.extend(limit_width("\t" + f))
results[-1] += "<br>"
return results
def process_notes(div):
if div is None:
return []
div = div.find_all('p')
pg = process_paragraphs(div)
results = []
param = "@note "
padd = ""
for i in range(len(param)):
padd += " "
for f in pg:
results.extend(limit_width(f, padd, param))
return results
def process_errors(div):
if div is None:
return []
div = div.find_all('p')
pg = process_paragraphs(div)
results = []
param = "@errors "
padd = ""
for i in range(len(param)):
padd += " "
for f in pg:
results.extend(limit_width(f, padd, param))
return results
def process_seealso(div):
if div is None:
return []
div = div.find('p').find_all('a')
alsos = []
for a in div:
text = a.get_text()
alsos.append("@see " + text)
return alsos
def process_page(url):
global soup
soup = open_url(url)
ref = soup.find("div", class_="refentry")
2023-12-22 12:32:17 -05:00
if ref is None:
print("NULL")
return
2023-12-22 12:28:02 -05:00
name_div = ref.find("div", class_="refnamediv")
cspec_div = ref.find("div", class_="refsynopsisdiv").find("div", class_="funcsynopsis")
parameters_div = ref.find("div", id="parameters")
description_div = ref.find("div", id="description")
notes_div = ref.find("div", id="notes")
associatedgets_div = ref.find("div", id="associatedgets")
errors_div = ref.find("div", id="errors")
seealso_div = ref.find("div", id="seealso")
print("/**")
print(" * " + process_name(name_div))
print(" * @usage")
for f in process_spec(cspec_div):
print(" * " + f)
for f in process_params(parameters_div):
print(" * " + f)
print(" * @description")
for f in process_desc(description_div):
print(" * " + f);
print(" * ")
for f in process_notes(notes_div):
print(" * " + f)
print(" * ")
for f in process_errors(errors_div):
print(" * " + f)
print(" * ")
for f in process_seealso(seealso_div):
print(" * " + f)
print(" */")
# process_page("https://registry.khronos.org/OpenGL-Refpages/gl4/html/glClearColor.xhtml")
#process_page("https://registry.khronos.org/OpenGL-Refpages/gl4/html/glBlendEquationSeparate.xhtml")
if (len (sys.argv) > 1):
process_page(sys.argv[1])
else:
print("Please provide the site to load")