2222 'Collision_shape' : 'Colshape' ,
2323}
2424
25+ NAME_BLACKLIST = [
26+ 'Matrix' ,
27+ 'Vector'
28+ ]
29+
2530def clean_category (category_name : str ) -> str :
2631 if category_name .endswith ("events" ):
2732 return category_name [:- 7 ]
@@ -61,6 +66,8 @@ def parse_links(source_label: str, url: str) -> dict:
6166 if a and a .get ("href" , "" ).startswith ("/wiki/" ):
6267 name = a .text .strip ()
6368 name = name .replace ("/wiki/" , "" ).split ("/" )[- 1 ]
69+ if any (blacklist in name for blacklist in NAME_BLACKLIST ):
70+ continue
6471 page_url = a .get ("href" )
6572 page_url = f"https://wiki.multitheftauto.com{ page_url } "
6673 if name not in result [current_category ]:
@@ -134,7 +141,7 @@ def parse_examples(content_div):
134141 return examples
135142
136143
137- def parse_note_boxes (content_div ):
144+ def parse_notes (content_div ):
138145 note_boxes = []
139146
140147 # 1. Note and warning boxes use specific class names
@@ -171,16 +178,44 @@ def parse_note_boxes(content_div):
171178 for table in content_div .find_all ("table" ):
172179 style = table .get ("style" , "" )
173180 if "98fb98" in style and "border-left" in style : # distinctive green border
174- td = table .find ("td" )
175- if td :
176- text = td .get_text (strip = True )
181+
182+ rows = table .find_all ("tr" )
183+ if not rows :
184+ continue
185+ # Get the second <td> of the first <tr>
186+ cells = rows [0 ].find_all ("td" )
187+ if len (cells ) >= 2 :
188+ message_cell = cells [1 ]
189+ text = message_cell .get_text (" " , strip = True )
177190 text = text .replace ("Tip:" , "" , 1 ).strip ()
178191 note_boxes .append ({
179192 "type" : "tip" ,
180193 "text" : text
181194 })
195+
196+ # 3. Important Note boxes also don't have class, FFB2B2 border color
197+ for table in content_div .find_all ("table" ):
198+ # Ignore if it parent div has class "warning-messagebox" (because that's also the same color lol)
199+ if "warning-messagebox" in table .parent .get ("class" , []):
200+ continue
201+ style = table .get ("style" , "" )
202+ if "FFB2B2" in style and "border-left" in style :
203+
204+ rows = table .find_all ("tr" )
205+ if not rows :
206+ continue
207+ # Get the second <td> of the first <tr>
208+ cells = rows [0 ].find_all ("td" )
209+ if len (cells ) >= 2 :
210+ message_cell = cells [1 ]
211+ text = message_cell .get_text (" " , strip = True )
212+ text = text .replace ("Important Note:" , "" , 1 ).strip ()
213+ note_boxes .append ({
214+ "type" : "important" ,
215+ "text" : text
216+ })
182217
183- # 3 . "This article needs checking" boxes (purple border, distinct title)
218+ # 4 . "This article needs checking" boxes (purple border, distinct title)
184219 for table in content_div .find_all ("table" ):
185220 style = table .get ("style" , "" )
186221 if "border-left: 25px solid #8181ff" in style :
@@ -201,43 +236,63 @@ def parse_note_boxes(content_div):
201236 "text" : text
202237 })
203238
204- return note_boxes
205-
239+ the_notes = []
240+ the_meta = []
241+ for note in note_boxes :
242+ if note ["type" ] == "note" or note ["type" ] == "tip" or note ["type" ] == "warning" or note ["type" ] == "important" :
243+ the_notes .append ({
244+ "type" : "info" if note ["type" ] == "note" else note ["type" ],
245+ "content" : note ["text" ]
246+ })
247+ elif note ["type" ] == "needs_checking" :
248+ the_meta .append ({
249+ "needs_checking" : note ["text" ]
250+ })
206251
207- def parse_event_page (page_url : str , category : str , name : str , source : str ) -> dict :
208- response = requests .get (page_url )
209- soup = BeautifulSoup (response .text , "html.parser" )
252+ return the_notes , the_meta
210253
211- # Find first non-empty p inside mw-content-text
212- content_div = soup .find ("div" , id = "mw-content-text" )
213- if not content_div :
214- raise ValueError (f"Could not find content in { page_url } ." )
254+ def parse_description (content_div ):
215255
216- event_description = None
256+ the_description = None
217257 # Find the first p before a header h2 or h3
218258 for element in content_div .find_all (["p" , "h2" , "h3" ]):
219259 if element .name == "p" :
220260 text = element .get_text ().strip ()
221261 if text and not text .isspace ():
222- event_description = convert_to_markdown (str (element ))
223- event_description = event_description .strip ()
224- # print(f"Found description for {name}: {event_description }")
262+ the_description = convert_to_markdown (str (element ))
263+ the_description = the_description .strip ()
264+ # print(f"Found description for {name}: {the_description }")
225265 break
226266 elif element .name in ["h2" , "h3" ]:
227267 # Stop at the first header
228268 break
229269
230- if not event_description :
270+ if not the_description :
231271 # Alternatively, look for content inside a div that has style="padding: 4px 8px"
232272 divs = content_div .find_all ("div" , style = "padding: 4px 8px" )
233273 for div in divs :
234274 text = div .get_text ()
235275 if text and not text .isspace ():
236- event_description = convert_to_markdown (str (div ))
237- event_description = event_description .strip ()
238- # print(f"Found description in div for {name}: {event_description }")
276+ the_description = convert_to_markdown (str (div ))
277+ the_description = the_description .strip ()
278+ # print(f"Found description in div for {name}: {the_description }")
239279 break
240280
281+ return the_description
282+
283+
284+ def parse_event_page (page_url : str , category : str , name : str , source : str ) -> dict :
285+ response = requests .get (page_url )
286+ soup = BeautifulSoup (response .text , "html.parser" )
287+
288+ # Find first non-empty p inside mw-content-text
289+ content_div = soup .find ("div" , id = "mw-content-text" )
290+ if not content_div :
291+ raise ValueError (f"Could not find content in { page_url } " )
292+
293+ event_type = "client" if "Client" in source else "server"
294+
295+ event_description = parse_description (content_div )
241296 if event_description is None :
242297 raise ValueError (f"Could not find a valid description for { name } in { page_url } " )
243298
@@ -338,8 +393,6 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
338393 if len (examples ) == 0 :
339394 print (f"Found no examples for { name } " )
340395
341- event_type = "client" if "Client" in source else "server"
342-
343396 # For each example, create a .lua file with the code
344397 # with name eventName-index.lua
345398 example_index = 1
@@ -360,20 +413,7 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
360413 })
361414 example_index += 1
362415
363- note_boxes = parse_note_boxes (content_div )
364- event_notes = []
365- event_meta = []
366- for note in note_boxes :
367- if note ["type" ] == "note" or note ["type" ] == "tip" or note ["type" ] == "warning" :
368- event_notes .append ({
369- "type" : "info" if note ["type" ] == "note" else note ["type" ],
370- "content" : note ["text" ]
371- })
372- elif note ["type" ] == "needs_checking" :
373- event_meta .append ({
374- "needs_checking" : note ["text" ]
375- })
376-
416+ event_notes , event_meta = parse_notes (content_div )
377417
378418 yaml_dict = {
379419 "name" : name ,
@@ -396,26 +436,51 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
396436
397437 return yaml_dict
398438
399- def parse_function_page (page_url : str , category : str , name : str , source : str ) -> str :
400- if source .startswith ("Shared" ):
401- yaml_content = "shared: &shared\n "
402- yaml_content += f" incomplete: true\n "
403- yaml_content += f" name: { name } \n "
404- yaml_content += f" description: TODO\n "
405- yaml_content += "\n server:\n <<: *shared"
406- yaml_content += "\n client:\n <<: *shared"
407- elif source .startswith ("Server" ):
408- yaml_content = "server:\n "
409- yaml_content += f" incomplete: true\n "
410- yaml_content += f" name: { name } \n "
411- yaml_content += f" description: TODO\n "
412- elif source .startswith ("Client" ):
413- yaml_content = "client:\n "
414- yaml_content += f" incomplete: true\n "
415- yaml_content += f" name: { name } \n "
416- yaml_content += f" description: TODO\n "
417-
418- return yaml_content
439+ def parse_function_page (page_url : str , category : str , name : str , source : str ) -> dict :
440+ response = requests .get (page_url )
441+ soup = BeautifulSoup (response .text , "html.parser" )
442+ content_div = soup .find ("div" , id = "mw-content-text" )
443+ if not content_div :
444+ raise ValueError (f"Could not find content in { page_url } " )
445+
446+ func_type = "shared" if "Shared" in source else "server" if "Server" in source else "client"
447+
448+ func_description = parse_description (content_div )
449+ if func_description is None :
450+ raise ValueError (f"Could not find a valid description for { name } in { page_url } " )
451+
452+ func_notes , func_meta = parse_notes (content_div )
453+
454+ yaml_dict = {
455+ func_type : {
456+ "name" : name ,
457+ "description" : func_description ,
458+ "parameters" : [],
459+ "examples" : [],
460+ "notes" : func_notes ,
461+ "meta" : func_meta
462+ }
463+ }
464+
465+ # if source.startswith("Shared"):
466+ # yaml_content = "shared: &shared\n"
467+ # yaml_content += f" incomplete: true\n"
468+ # yaml_content += f" name: {name}\n"
469+ # yaml_content += f" description: TODO\n"
470+ # yaml_content += "\nserver:\n <<: *shared"
471+ # yaml_content += "\nclient:\n <<: *shared"
472+ # elif source.startswith("Server"):
473+ # yaml_content = "server:\n"
474+ # yaml_content += f" incomplete: true\n"
475+ # yaml_content += f" name: {name}\n"
476+ # yaml_content += f" description: TODO\n"
477+ # elif source.startswith("Client"):
478+ # yaml_content = "client:\n"
479+ # yaml_content += f" incomplete: true\n"
480+ # yaml_content += f" name: {name}\n"
481+ # yaml_content += f" description: TODO\n"
482+
483+ return yaml_dict
419484
420485def convert_page_to_yaml (page_url : str , category : str , name : str , source : str ) -> str :
421486 # This scrapes the page and tries to parse the MediaWiki content into a YAML format for the function/event
@@ -426,17 +491,17 @@ def convert_page_to_yaml(page_url: str, category: str, name: str, source: str) -
426491 raise ValueError ("Source must be either a function or an event." )
427492
428493 if is_event :
429- yaml_content = yaml .safe_dump (parse_event_page (page_url , category , name , source ),
430- sort_keys = False ,
431- allow_unicode = True ,
432- default_flow_style = False )
494+ yaml_dict = parse_event_page (page_url , category , name , source )
433495
434496 elif is_function :
435- yaml_content = parse_function_page (page_url , category , name , source )
436-
437- return yaml_content
497+ yaml_dict = parse_function_page (page_url , category , name , source )
498+
499+ return yaml .safe_dump (yaml_dict ,
500+ sort_keys = False ,
501+ allow_unicode = True ,
502+ default_flow_style = False )
438503
439- def write_yaml_per_entry (base_dir , data_by_source ):
504+ def parse_items_by_source (base_dir , data_by_source ):
440505 for source , categories in data_by_source .items ():
441506 print (f"Parsing individual pages of { source } ..." )
442507 for category , entries in categories .items ():
@@ -464,9 +529,16 @@ def main():
464529 events_by_source = {}
465530
466531 # Functions
532+ # functions_by_source["Shared functions"] = parse_links("Shared functions", URL_SHARED_FUNCS)
467533 # functions_by_source["Client functions"] = parse_links("Client functions", URL_CLIENT_FUNCS)
468534 # functions_by_source["Server functions"] = parse_links("Server functions", URL_SERVER_FUNCS)
469- # functions_by_source["Shared functions"] = parse_links("Shared functions", URL_SHARED_FUNCS)
535+
536+ # TEST Parse only these:
537+ # functions_by_source["Shared functions"] = {
538+ # "Element": [
539+ # ("https://wiki.multitheftauto.com/wiki/SetElementParent", "setElementParent"),
540+ # ]
541+ # }
470542
471543 # Events
472544 events_by_source ["Client events" ] = parse_links ("Client events" , URL_CLIENT_EVENTS )
@@ -476,8 +548,8 @@ def main():
476548 if os .path .exists ("./output" ):
477549 shutil .rmtree ("./output" )
478550
479- write_yaml_per_entry (FUNCTIONS_DIR , functions_by_source )
480- write_yaml_per_entry (EVENTS_DIR , events_by_source )
551+ parse_items_by_source (FUNCTIONS_DIR , functions_by_source )
552+ parse_items_by_source (EVENTS_DIR , events_by_source )
481553
482554if __name__ == "__main__" :
483555 main ()
0 commit comments