@@ -293,6 +293,26 @@ def parse_description(content_div):
293293
294294 return the_description
295295
296+ def parse_issues (content_div ):
297+ issues = []
298+ for a in content_div .find_all ("a" , href = True ):
299+ # Ignore if parent div has class "note-messagebox"
300+ if "note-messagebox" in a .find_parent ("div" ).get ("class" , []):
301+ continue
302+ href = a ["href" ]
303+ if "github.com/multitheftauto/mtasa-blue/issues/" in href :
304+ issue_number = href .split ("/" )[- 1 ]
305+ # Find the next td, that is the description
306+ issue_desc = "TODO"
307+ next_td = a .find_next ("td" )
308+ if next_td :
309+ issue_desc = next_td .get_text (strip = True )
310+ issues .append ({
311+ "id" : issue_number ,
312+ "description" : issue_desc
313+ })
314+ return issues
315+
296316def get_page_from_cache_or_fetch (page_url : str , page_name : str ) -> str :
297317 """Get the page content from cache or fetch it if not cached."""
298318 cache_file = os .path .join (PAGES_CACHE_DIR , f"{ page_name } .html" )
@@ -309,6 +329,31 @@ def get_page_from_cache_or_fetch(page_url: str, page_name: str) -> str:
309329 else :
310330 raise ValueError (f"Failed to fetch { page_url } : { response .status_code } " )
311331
332+ def print_additional_headers_found_in_page (content_div , handled_header_names , page_url ):
333+ """Print any additional headers found in the content_div that were not handled."""
334+ additional_headers = []
335+ # Ignore headers from see also
336+ IGNORE_WORDS = [
337+ "see also" , "events" , "functions" , "changelog" ,
338+ "game processing order" , "input" , "gui" ,
339+ "browsers" , "buttons" , "checkboxes" , "comboboxes" ,
340+ "edit boxes" , "gridlists" , "memos" , "progressbars" , "radio buttons" ,
341+ "scrollbars" , "scrollpanes" , "static images" , "tab Panels" , "tabs" ,
342+ "tab panels" , "text labels" , "windows"
343+ ]
344+ for header in content_div .find_all (["h2" , "h3" ]):
345+ header_text = header .get_text (strip = True )
346+ if header_text and header_text not in handled_header_names :
347+ header_text_lower = header_text .lower ()
348+ # Ignore some headers that are not relevant
349+ if any (ignore_word in header_text_lower for ignore_word in IGNORE_WORDS ):
350+ continue
351+ additional_headers .append (header_text )
352+
353+ if additional_headers :
354+ print (f"Other headers found in { page_url } :" )
355+ print (f" { ', ' .join (additional_headers )} " )
356+
312357def parse_event_page (page_url : str , category : str , name : str , source : str ) -> dict :
313358 response_text = get_page_from_cache_or_fetch (page_url , name )
314359
@@ -318,6 +363,10 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
318363 content_div = soup .find ("div" , id = "mw-content-text" )
319364 if not content_div :
320365 raise ValueError (f"Could not find content in { page_url } " )
366+
367+ stop_if_deprecated (content_div , page_url )
368+
369+ handled_header_names = []
321370
322371 event_type = "client" if "Client" in source else "server"
323372
@@ -330,6 +379,7 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
330379 parameters_header = content_div .find ("span" , id = "Parameters" )
331380
332381 if parameters_header :
382+ handled_header_names .append ("Parameters" )
333383 params = []
334384 next_element = parameters_header .find_next ()
335385
@@ -393,6 +443,7 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
393443 event_source = None
394444 source_header = content_div .find ("span" , id = "Source" )
395445 if source_header :
446+ handled_header_names .append ("Source" )
396447 source_paragraph = source_header .find_next ("p" )
397448 if source_paragraph :
398449 source_text = source_paragraph .get_text ().strip ()
@@ -409,6 +460,7 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
409460 event_canceling = None
410461 canceling_header = content_div .find ("span" , id = "Canceling" ) or content_div .find ("span" , id = "Cancelling" ) or content_div .find ("span" , id = "Cancel_effect" ) or content_div .find ("span" , id = "Cancel_effects" ) or content_div .find ("span" , id = "Cancel_Effect" ) or content_div .find ("span" , id = "Cancel_Effects" )
411462 if canceling_header :
463+ handled_header_names .append (canceling_header .text .strip ())
412464 # Extract text
413465 canceling_paragraph = canceling_header .find_next ("p" )
414466 if canceling_paragraph :
@@ -419,11 +471,11 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
419471
420472 # Examples
421473 examples = parse_examples (content_div )
474+ handled_header_names .append ("Examples" )
475+ handled_header_names .append ("Example" )
422476 if len (examples ) == 0 :
423477 print (f"Event is missing code examples: { page_url } " )
424478
425- # For each example, create a .lua file with the code
426- # with name eventName-index.lua
427479 example_index = 1
428480 added_examples = []
429481 for example in examples :
@@ -444,6 +496,35 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
444496
445497 event_notes , event_meta = parse_notes (content_div )
446498
499+ # Parse Type section, put it into a note
500+ type_header = content_div .find ("span" , id = "Type" )
501+ if type_header :
502+ type_paragraph = type_header .find_next ("p" )
503+ if type_paragraph :
504+ type_text = type_paragraph .get_text ().strip ()
505+ if type_text :
506+ # Remove new lines from the type text
507+ type_text = type_text .replace ("\n " , " " )
508+ # Look for any list after that paragraph
509+ list_items = type_paragraph .find_next ("ul" )
510+ if list_items :
511+ prev_header = type_paragraph .find_previous ("h2" ) or type_paragraph .find_previous ("h3" )
512+ if prev_header and prev_header .getText (strip = True ) == "Type" :
513+ # If the header is "Type", we can safely add the list items to the type text
514+ type_text += " " + ", " .join (li .get_text (strip = True ) for li in list_items .find_all ("li" ))
515+
516+ event_notes .append ({
517+ "type" : "info" ,
518+ "content" : type_text
519+ })
520+ handled_header_names .append ("Type" )
521+
522+ # Parse Issues
523+ event_issues = parse_issues (content_div )
524+ handled_header_names .append ("Issues" )
525+
526+ print_additional_headers_found_in_page (content_div , handled_header_names , page_url )
527+
447528 yaml_dict = {
448529 "name" : name ,
449530 "type" : event_type ,
@@ -458,38 +539,72 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
458539 yaml_dict ["notes" ] = event_notes
459540 if event_meta :
460541 yaml_dict ["meta" ] = event_meta
542+ if event_issues :
543+ yaml_dict ["issues" ] = event_issues
461544
462545 # Set incomplete to true if no description is found for at least one parameter
463546 if any (param ["description" ] == "MISSING_PARAM_DESC" for param in event_parameters ):
464547 yaml_dict ["incomplete" ] = True
465548
466549 return yaml_dict
467550
551+ def stop_if_deprecated (content_div , page_url : str ):
552+ deprecated_texts = [
553+ "This function is deprecated" ,
554+ "Function has been disabled" ,
555+ "This function is provided by the external" ,
556+ "This page is marked for deletion"
557+ # "BEFORE VERSION",
558+ ]
559+ for text in deprecated_texts :
560+ if content_div .find (string = lambda s : s and text in s ):
561+ raise ValueError (f"Found { text } in { page_url } . Please review manually." )
562+
468563def parse_function_page (page_url : str , category : str , name : str , source : str ) -> dict :
469564 response_text = get_page_from_cache_or_fetch (page_url , name )
470565
471566 soup = BeautifulSoup (response_text , "html.parser" )
472567 content_div = soup .find ("div" , id = "mw-content-text" )
473568 if not content_div :
474569 raise ValueError (f"Could not find content in { page_url } " )
570+
571+ stop_if_deprecated (content_div , page_url )
572+
573+ handled_header_names = []
475574
476575 func_type = "shared" if "Shared" in source else "server" if "Server" in source else "client"
477576
478577 func_description = parse_description (content_div )
479578 if func_description is None :
480579 raise ValueError (f"Could not find a valid description for { name } in { page_url } " )
481580
482- func_notes , func_meta = parse_notes (content_div )
581+ func_pair = None
582+ counterpart_b = content_div .find ("b" , string = "Counterpart" )
583+ if counterpart_b :
584+ i_tag = counterpart_b .find_next ("i" )
585+ if i_tag and i_tag .a :
586+ func_pair = i_tag .a .text .strip ()
483587
588+ func_notes , func_meta = parse_notes (content_div )
589+
590+ # Syntax: parameters and returns TODO
591+ handled_header_names .append ("Syntax" )
592+ handled_header_names .append ("Parameters" )
593+ handled_header_names .append ("Arguments" )
594+ handled_header_names .append ("Required Arguments" )
595+ handled_header_names .append ("Required arguments" )
596+ handled_header_names .append ("Optional Arguments" )
597+ handled_header_names .append ("Optional arguments" )
598+ handled_header_names .append ("Returns" )
599+
484600
485601 # Examples
486602 examples = parse_examples (content_div )
603+ handled_header_names .append ("Examples" )
604+ handled_header_names .append ("Example" )
487605 # if len(examples) == 0:
488606 # print(f"Function is missing code examples: {page_url}")
489607
490-
491- # For each example, create a .lua file with the code
492- # with name eventName-index.lua
493608 example_index = 1
494609 added_examples = []
495610 for example in examples :
@@ -508,35 +623,28 @@ def parse_function_page(page_url: str, category: str, name: str, source: str) ->
508623 })
509624 example_index += 1
510625
626+ # Parse Issues
627+ func_issues = parse_issues (content_div )
628+ handled_header_names .append ("Issues" )
629+
630+ print_additional_headers_found_in_page (content_div , handled_header_names , page_url )
511631
512632 yaml_dict = {
513633 func_type : {
514634 "name" : name ,
515635 "description" : func_description ,
516636 "parameters" : [],
517637 "examples" : added_examples ,
518- "notes" : func_notes ,
519- "meta" : func_meta
520638 }
521639 }
522-
523- # if source.startswith("Shared"):
524- # yaml_content = "shared: &shared\n"
525- # yaml_content += f" incomplete: true\n"
526- # yaml_content += f" name: {name}\n"
527- # yaml_content += f" description: TODO\n"
528- # yaml_content += "\nserver:\n <<: *shared"
529- # yaml_content += "\nclient:\n <<: *shared"
530- # elif source.startswith("Server"):
531- # yaml_content = "server:\n"
532- # yaml_content += f" incomplete: true\n"
533- # yaml_content += f" name: {name}\n"
534- # yaml_content += f" description: TODO\n"
535- # elif source.startswith("Client"):
536- # yaml_content = "client:\n"
537- # yaml_content += f" incomplete: true\n"
538- # yaml_content += f" name: {name}\n"
539- # yaml_content += f" description: TODO\n"
640+ if func_pair :
641+ yaml_dict [func_type ]["pair" ] = func_pair
642+ if func_notes :
643+ yaml_dict [func_type ]["notes" ] = func_notes
644+ if func_meta :
645+ yaml_dict [func_type ]["meta" ] = func_meta
646+ if func_issues :
647+ yaml_dict [func_type ]["issues" ] = func_issues
540648
541649 return yaml_dict
542650
@@ -599,8 +707,8 @@ def main():
599707
600708 # TEST Parse only these:
601709 # functions_by_source["Shared functions"] = {
602- # "Element ": [
603- # ("https://wiki.multitheftauto.com/wiki/SetElementParent ", "setElementParent "),
710+ # "Player ": [
711+ # ("https://wiki.multitheftauto.com/wiki/SetPlayerName ", "setPlayerName "),
604712 # ]
605713 # }
606714
0 commit comments