2525from . import Extension
2626from ..blockprocessors import BlockProcessor
2727from ..inlinepatterns import InlineProcessor
28- from ..util import AtomicString
28+ from ..treeprocessors import Treeprocessor
29+ from ..util import AtomicString , deprecated
30+ from typing import TYPE_CHECKING
2931import re
3032import xml .etree .ElementTree as etree
3133
34+ if TYPE_CHECKING : # pragma: no cover
35+ from .. import Markdown
36+ from ..blockparsers import BlockParser
37+
3238
3339class AbbrExtension (Extension ):
3440 """ Abbreviation Extension for Python-Markdown. """
3541
36- def extendMarkdown (self , md ):
37- """ Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """
38- md .parser .blockprocessors .register (AbbrPreprocessor (md .parser ), 'abbr' , 16 )
42+ def __init__ (self , ** kwargs ):
43+ """ Initiate Extension and set up configs. """
44+ super ().__init__ (** kwargs )
45+ self .abbrs = {}
3946
47+ def reset (self ):
48+ """ Clear all previously defined abbreviations. """
49+ self .abbrs .clear ()
4050
41- class AbbrPreprocessor (BlockProcessor ):
42- """ Abbreviation Preprocessor - parse text for abbr references. """
51+ def extendMarkdown (self , md ):
52+ """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
53+ md .registerExtension (self )
54+ md .treeprocessors .register (AbbrTreeprocessor (md , self .abbrs ), 'abbr' , 7 )
55+ md .parser .blockprocessors .register (AbbrBlockprocessor (md .parser , self .abbrs ), 'abbr' , 16 )
56+
57+
58+ class AbbrTreeprocessor (Treeprocessor ):
59+ """ Replace abbreviation text with `<abbr>` elements. """
60+
61+ def __init__ (self , md : Markdown | None = None , abbrs : dict | None = None ):
62+ self .abbrs : dict = abbrs if abbrs is not None else {}
63+ self .RE : re .RegexObject | None = None
64+ super ().__init__ (md )
65+
66+ def iter_element (self , el : etree .Element , parent : etree .Element | None = None ) -> None :
67+ ''' Recursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''
68+ for child in reversed (el ):
69+ self .iter_element (child , el )
70+ if text := el .text :
71+ for m in reversed (list (self .RE .finditer (text ))):
72+ abbr = etree .Element ('abbr' , {'title' : self .abbrs [m .group (0 )]})
73+ abbr .text = AtomicString (m .group (0 ))
74+ abbr .tail = text [m .end ():]
75+ el .insert (0 , abbr )
76+ text = text [:m .start ()]
77+ el .text = text
78+ if parent and el .tail :
79+ tail = el .tail
80+ index = list (parent ).index (el ) + 1
81+ for m in reversed (list (self .RE .finditer (tail ))):
82+ abbr = etree .Element ('abbr' , {'title' : self .abbrs [m .group (0 )]})
83+ abbr .text = AtomicString (m .group (0 ))
84+ abbr .tail = tail [m .end ():]
85+ parent .insert (index , abbr )
86+ tail = tail [:m .start ()]
87+ el .tail = tail
88+
89+ def run (self , root : etree .Element ) -> etree .Element | None :
90+ ''' Step through tree to find known abbreviations. '''
91+ if not self .abbrs :
92+ # No abbreviations defined. Skip running processor.
93+ return
94+ # Build and compile regex
95+ self .RE = re .compile (f"\\ b(?:{ '|' .join (re .escape (key ) for key in self .abbrs ) } )\\ b" )
96+ # Step through tree and modify on matches
97+ self .iter_element (root )
98+
99+
100+ class AbbrBlockprocessor (BlockProcessor ):
101+ """ Parse text for abbreviation references. """
43102
44103 RE = re .compile (r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$' , re .MULTILINE )
45104
105+ def __init__ (self , parser : BlockParser , abbrs : dict ):
106+ self .abbrs : dict = abbrs
107+ super ().__init__ (parser )
108+
46109 def test (self , parent : etree .Element , block : str ) -> bool :
47110 return True
48111
49112 def run (self , parent : etree .Element , blocks : list [str ]) -> bool :
50113 """
51- Find and remove all Abbreviation references from the text.
52- Each reference is set as a new `AbbrPattern` in the markdown instance .
114+ Find and remove all abbreviation references from the text.
115+ Each reference is added to the abbreviation collection .
53116
54117 """
55118 block = blocks .pop (0 )
56119 m = self .RE .search (block )
57120 if m :
58121 abbr = m .group ('abbr' ).strip ()
59122 title = m .group ('title' ).strip ()
60- self .parser .md .inlinePatterns .register (
61- AbbrInlineProcessor (self ._generate_pattern (abbr ), title ), 'abbr-%s' % abbr , 2
62- )
123+ self .abbrs [abbr ] = title
63124 if block [m .end ():].strip ():
64125 # Add any content after match back to blocks as separate block
65126 blocks .insert (0 , block [m .end ():].lstrip ('\n ' ))
@@ -71,11 +132,11 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
71132 blocks .insert (0 , block )
72133 return False
73134
74- def _generate_pattern (self , text : str ) -> str :
75- """ Given a string, returns a regex pattern to match that string. """
76- return f"(?P<abbr>\\ b{ re .escape (text ) } \\ b)"
135+
136+ AbbrPreprocessor = deprecated ("This class has been renamed to `AbbrBlockprocessor`." )(AbbrBlockprocessor )
77137
78138
139+ @deprecated ("This class will be removed in the future; use `AbbrTreeprocessor` instead." )
79140class AbbrInlineProcessor (InlineProcessor ):
80141 """ Abbreviation inline pattern. """
81142
0 commit comments