Skip to content

Commit a7d38ff

Browse files
committed
[feat] Add support for parsing JSON with text prefix and postfix
1 parent d835069 commit a7d38ff

File tree

3 files changed

+104
-2
lines changed

3 files changed

+104
-2
lines changed

README.md

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,28 @@ If the JSON string is malformed, the `parse` function will throw an error:
102102
loads("wrong") # MalformedJSON: Malformed node or string on line 1
103103
```
104104

105+
### Handling text around JSON
106+
107+
Sometimes JSON might be embedded in other text. You can use `PREFIX` and `POSTFIX` options to handle this:
108+
109+
```python
110+
from partial_json_parser import loads, PREFIX, POSTFIX
111+
112+
# Handle text before JSON
113+
result = loads('This is your JSON: {"key": "value"}', PREFIX)
114+
print(result) # Outputs: {'key': 'value'}
115+
116+
# Handle text after JSON
117+
result = loads('{"key": "value"} - end of JSON', POSTFIX)
118+
print(result) # Outputs: {'key': 'value'}
119+
120+
# Handle both
121+
result = loads('Start of JSON: {"key": "value"} - end of JSON', PREFIX | POSTFIX)
122+
print(result) # Outputs: {'key': 'value'}
123+
```
124+
125+
Note that `PREFIX` looks for the first `{` or `[` character and `POSTFIX` looks for the last `}` or `]` character to determine the JSON boundaries.
126+
105127
## API Reference
106128

107129
### loads(json_string, [allow_partial], [parser])
@@ -149,7 +171,9 @@ Enum class that specifies what kind of partialness is allowed during JSON parsin
149171
- `SPECIAL`: Allow all special values.
150172
- `ATOM`: Allow all atomic values.
151173
- `COLLECTION`: Allow all collection values.
152-
- `ALL`: Allow all values.
174+
- `PREFIX`: Allow text before the JSON string starts (e.g. `This is your JSON: {"key": "value"}`).
175+
- `POSTFIX`: Allow text after the JSON string ends (e.g. `{"key": "value"} - end of JSON`).
176+
- `ALL`: Allow all values
153177

154178
## Testing
155179

src/partial_json_parser/core/myelin.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,78 @@ def join_closing_tokens(stack: List[Tuple[int, str]]):
2020

2121
def fix_fast(json_string: str, allow_partial: Union[Allow, int] = ALL):
2222
allow = Allow(allow_partial)
23+
24+
# Handle PREFIX by finding first { or [
25+
if PREFIX in allow:
26+
first_brace = json_string.find('{')
27+
first_bracket = json_string.find('[')
28+
29+
if first_brace != -1 and (first_bracket == -1 or first_brace < first_bracket):
30+
json_string = json_string[first_brace:]
31+
elif first_bracket != -1:
32+
json_string = json_string[first_bracket:]
33+
34+
# Handle POSTFIX by finding last } or ]
35+
if POSTFIX in allow:
36+
last_brace = json_string.rfind('}')
37+
last_bracket = json_string.rfind(']')
38+
39+
if last_brace != -1 and (last_bracket == -1 or last_brace > last_bracket):
40+
json_string = json_string[:last_brace + 1]
41+
elif last_bracket != -1:
42+
json_string = json_string[:last_bracket + 1]
43+
44+
# Always enable STR when handling PREFIX/POSTFIX
45+
if PREFIX in allow or POSTFIX in allow:
46+
allow = Allow(allow | STR)
47+
48+
return _fix(json_string, allow, True)
49+
50+
51+
def fix_fast_old(json_string: str, allow_partial: Union[Allow, int] = ALL):
52+
allow = Allow(allow_partial)
53+
original_allow = allow
54+
55+
# Handle PREFIX by finding first { or [
56+
if PREFIX in allow:
57+
first_brace = json_string.find('{')
58+
first_bracket = json_string.find('[')
59+
60+
if first_brace != -1 and (first_bracket == -1 or first_brace < first_bracket):
61+
json_string = json_string[first_brace:]
62+
elif first_bracket != -1:
63+
json_string = json_string[first_bracket:]
64+
65+
# Handle POSTFIX by finding matching closing brace/bracket
66+
if POSTFIX in allow:
67+
# Find opening token
68+
first_char = json_string[0] if json_string else ''
69+
if first_char not in '{[':
70+
# No valid JSON start found
71+
return _fix(json_string, original_allow, True)
72+
73+
# Find matching closing token
74+
closing_char = '}' if first_char == '{' else ']'
75+
stack = []
76+
in_string = False
77+
78+
for i, char in enumerate(json_string):
79+
if char == '"' and (i == 0 or json_string[i-1] != '\\'):
80+
in_string = not in_string
81+
elif not in_string:
82+
if char in '{[':
83+
stack.append(char)
84+
elif char in ']}':
85+
if not stack:
86+
break
87+
if (char == '}' and stack[-1] == '{') or (char == ']' and stack[-1] == '['):
88+
stack.pop()
89+
if not stack: # Found matching closing token
90+
json_string = json_string[:i+1]
91+
break
92+
93+
# Remove PREFIX/POSTFIX from allow since we've handled them
94+
allow = Allow(allow & ~(PREFIX | POSTFIX))
2395

2496
def is_escaped(index: int):
2597
text_before = json_string[:index]

src/partial_json_parser/core/options.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@ class Allow(IntFlag):
1313
NAN = auto()
1414
INFINITY = auto()
1515
_INFINITY = auto()
16+
PREFIX = auto()
17+
POSTFIX = auto()
1618

1719
INF = INFINITY | _INFINITY
1820
SPECIAL = NULL | BOOL | INF | NAN
1921
ATOM = STR | NUM | SPECIAL
2022
COLLECTION = ARR | OBJ
21-
ALL = ATOM | COLLECTION
23+
ALL = ATOM | COLLECTION | PREFIX | POSTFIX
2224

2325

2426
STR = Allow.STR
@@ -35,6 +37,8 @@ class Allow(IntFlag):
3537
ATOM = Allow.ATOM
3638
COLLECTION = Allow.COLLECTION
3739
ALL = Allow.ALL
40+
PREFIX = Allow.PREFIX
41+
POSTFIX = Allow.POSTFIX
3842

3943

4044
__all__ = [
@@ -53,4 +57,6 @@ class Allow(IntFlag):
5357
"ATOM",
5458
"COLLECTION",
5559
"ALL",
60+
"PREFIX",
61+
"POSTFIX",
5662
]

0 commit comments

Comments
 (0)