220 lines
5.9 KiB
Python
220 lines
5.9 KiB
Python
|
from bs4 import BeautifulSoup
|
||
|
import requests
|
||
|
import re
|
||
|
import sys
|
||
|
|
||
|
def open_url(url):
|
||
|
return BeautifulSoup(requests.get(url).content, "html.parser")
|
||
|
|
||
|
def extract(o):
|
||
|
if o == None:
|
||
|
return ""
|
||
|
else:
|
||
|
return o.get_text()
|
||
|
|
||
|
def replace_tag(element, old_tag_str, new_tag_str, **kwargs):
|
||
|
elements = element.find_all(old_tag_str, **kwargs)
|
||
|
for tag in elements:
|
||
|
new_tag = soup.new_tag(new_tag_str)
|
||
|
new_tag.string = tag.text
|
||
|
tag.replace_with(new_tag)
|
||
|
|
||
|
def process_paragraphs(p_tags):
|
||
|
paragraphs = []
|
||
|
for p in p_tags:
|
||
|
replace_tag(p, "code", 'b')
|
||
|
text = p.get_text()
|
||
|
text = re.sub("\n[\s]+", " ", text)
|
||
|
text = text.replace("\n", "")
|
||
|
if text.isspace() or len(text) == 0:
|
||
|
continue
|
||
|
paragraphs.append(text.strip())
|
||
|
return paragraphs
|
||
|
|
||
|
def limit_width(line, padding_str = "", first_str = None, width = 120):
|
||
|
if (first_str == None):
|
||
|
first_str = padding_str
|
||
|
results = []
|
||
|
line_str = ""
|
||
|
is_first = True
|
||
|
needs_new_line = False
|
||
|
for i, c in enumerate(line):
|
||
|
if (i % width == width - 1):
|
||
|
needs_new_line = True
|
||
|
if (needs_new_line and c == ' '):
|
||
|
if (is_first):
|
||
|
results.append(first_str + line_str)
|
||
|
is_first = False
|
||
|
else:
|
||
|
results.append(padding_str + line_str)
|
||
|
line_str = ""
|
||
|
needs_new_line = False
|
||
|
else:
|
||
|
line_str += c
|
||
|
if (is_first):
|
||
|
results.append(first_str + line_str)
|
||
|
else:
|
||
|
results.append(padding_str + line_str)
|
||
|
return results
|
||
|
|
||
|
def process_name(div):
|
||
|
v = extract(div.find("p")).split('—')
|
||
|
if len(v) == 1:
|
||
|
return "@name" + v[0]
|
||
|
str = "@name " + v[0] + "-"
|
||
|
for i in range(1, len(v)):
|
||
|
str += v[i]
|
||
|
return str
|
||
|
|
||
|
def process_spec(div):
|
||
|
tables = div.find_all("table", class_="funcprototype-table")
|
||
|
funcs = []
|
||
|
for table in tables:
|
||
|
func = ""
|
||
|
rows = table.find_all("tr")
|
||
|
for row in rows:
|
||
|
datas = row.find_all("td")
|
||
|
for data in datas:
|
||
|
text = data.get_text()
|
||
|
if text is None or text.isspace():
|
||
|
continue
|
||
|
func += text.replace("\n", "").replace("\r", "")
|
||
|
funcs.append("@code " + func + " @endcode")
|
||
|
return funcs
|
||
|
|
||
|
def process_params(div):
|
||
|
if div is None:
|
||
|
return []
|
||
|
parent = div.find("div", class_="variablelist").find(class_="variablelist")
|
||
|
list = parent.find_all(recursive=False)
|
||
|
|
||
|
params = []
|
||
|
for p,d in zip(list[::2], list[1::2]):
|
||
|
param = p.get_text().replace("\n", "")
|
||
|
d = d.find()
|
||
|
replace_tag(d, "code", 'b')
|
||
|
line = ""
|
||
|
for content in d.contents:
|
||
|
line += re.sub("\n[\s]+", " ", str(content))
|
||
|
|
||
|
line_copy = ""
|
||
|
for i, c in enumerate(line):
|
||
|
line_copy += c
|
||
|
if (c == '.' or c == ',' ) and (i + 1 < len(line)):
|
||
|
if line[i + 1] != ' ':
|
||
|
line_copy += ' '
|
||
|
line = line_copy.strip()
|
||
|
|
||
|
param_str = "@param " + param + " "
|
||
|
padding_str = ""
|
||
|
for i in range(len(param_str)):
|
||
|
padding_str += ' '
|
||
|
params.extend(limit_width(line, padding_str, param_str))
|
||
|
return params
|
||
|
|
||
|
def process_desc(div):
|
||
|
if div is None:
|
||
|
return []
|
||
|
div = div.find_all('p')
|
||
|
|
||
|
pg = process_paragraphs(div)
|
||
|
results = []
|
||
|
for f in pg:
|
||
|
results.extend(limit_width("\t" + f))
|
||
|
results[-1] += "<br>"
|
||
|
|
||
|
return results
|
||
|
|
||
|
def process_notes(div):
|
||
|
if div is None:
|
||
|
return []
|
||
|
div = div.find_all('p')
|
||
|
|
||
|
pg = process_paragraphs(div)
|
||
|
results = []
|
||
|
param = "@note "
|
||
|
padd = ""
|
||
|
for i in range(len(param)):
|
||
|
padd += " "
|
||
|
for f in pg:
|
||
|
results.extend(limit_width(f, padd, param))
|
||
|
|
||
|
return results
|
||
|
|
||
|
def process_errors(div):
|
||
|
if div is None:
|
||
|
return []
|
||
|
div = div.find_all('p')
|
||
|
|
||
|
pg = process_paragraphs(div)
|
||
|
results = []
|
||
|
param = "@errors "
|
||
|
padd = ""
|
||
|
for i in range(len(param)):
|
||
|
padd += " "
|
||
|
for f in pg:
|
||
|
results.extend(limit_width(f, padd, param))
|
||
|
|
||
|
return results
|
||
|
|
||
|
def process_seealso(div):
|
||
|
if div is None:
|
||
|
return []
|
||
|
div = div.find('p')
|
||
|
if div is None:
|
||
|
return []
|
||
|
div = div.find_all('a')
|
||
|
|
||
|
alsos = []
|
||
|
for a in div:
|
||
|
text = a.get_text()
|
||
|
alsos.append("@see " + text)
|
||
|
|
||
|
return alsos
|
||
|
|
||
|
def process_page(url):
|
||
|
global soup
|
||
|
soup = open_url(url)
|
||
|
ref = soup.find("div", class_="refentry")
|
||
|
|
||
|
if ref is None:
|
||
|
print("NULL")
|
||
|
return
|
||
|
|
||
|
name_div = ref.find("div", class_="refnamediv")
|
||
|
cspec_div = ref.find("div", class_="refsynopsisdiv").find("div", class_="funcsynopsis")
|
||
|
parameters_div = ref.find("div", id="parameters")
|
||
|
description_div = ref.find("div", id="description")
|
||
|
notes_div = ref.find("div", id="notes")
|
||
|
associatedgets_div = ref.find("div", id="associatedgets")
|
||
|
errors_div = ref.find("div", id="errors")
|
||
|
seealso_div = ref.find("div", id="seealso")
|
||
|
|
||
|
print("/**")
|
||
|
print(" * " + process_name(name_div))
|
||
|
print(" * @usage")
|
||
|
for f in process_spec(cspec_div):
|
||
|
print(" * " + f)
|
||
|
for f in process_params(parameters_div):
|
||
|
print(" * " + f)
|
||
|
print(" * @description")
|
||
|
for f in process_desc(description_div):
|
||
|
print(" * " + f);
|
||
|
print(" * ")
|
||
|
for f in process_notes(notes_div):
|
||
|
print(" * " + f)
|
||
|
print(" * ")
|
||
|
for f in process_errors(errors_div):
|
||
|
print(" * " + f)
|
||
|
print(" * ")
|
||
|
for f in process_seealso(seealso_div):
|
||
|
print(" * " + f)
|
||
|
print(" */")
|
||
|
|
||
|
# process_page("https://registry.khronos.org/OpenGL-Refpages/gl4/html/glClearColor.xhtml")
|
||
|
#process_page("https://registry.khronos.org/OpenGL-Refpages/gl4/html/glBlendEquationSeparate.xhtml")
|
||
|
|
||
|
if (len (sys.argv) > 1):
|
||
|
process_page(sys.argv[1])
|
||
|
else:
|
||
|
print("Please provide the site to load")
|