@@ -387,7 +387,7 @@ def _iterparse_nodes(self, iterparse: Callable) -> list[dict[str, str | None]]:
387387
388388 return dicts
389389
390- def _validate_path (self ) -> None :
390+ def _validate_path (self ) -> list [ Any ] :
391391 """
392392 Validate xpath.
393393
@@ -446,8 +446,7 @@ def parse_data(self) -> list[dict[str, str | None]]:
446446
447447 if self .iterparse is None :
448448 self .xml_doc = self ._parse_doc (self .path_or_buffer )
449- self ._validate_path ()
450- elems = self .xml_doc .findall (self .xpath , namespaces = self .namespaces )
449+ elems = self ._validate_path ()
451450
452451 self ._validate_names ()
453452
@@ -459,7 +458,7 @@ def parse_data(self) -> list[dict[str, str | None]]:
459458
460459 return xml_dicts
461460
462- def _validate_path (self ) -> None :
461+ def _validate_path (self ) -> list [ Any ] :
463462 """
464463 Notes
465464 -----
@@ -468,18 +467,28 @@ def _validate_path(self) -> None:
468467 """
469468
470469 msg = (
471- "xpath does not return any nodes. "
470+ "xpath does not return any nodes or attributes. "
471+ "Be sure to specify in `xpath` the parent nodes of "
472+ "children and attributes to parse. "
472473 "If document uses namespaces denoted with "
473474 "xmlns, be sure to define namespaces and "
474475 "use them in xpath."
475476 )
476477 try :
477- elems = self .xml_doc .find (self .xpath , namespaces = self .namespaces )
478+ elems = self .xml_doc .findall (self .xpath , namespaces = self .namespaces )
479+ children = [ch for el in elems for ch in el .findall ("*" )]
480+ attrs = {k : v for el in elems for k , v in el .attrib .items ()}
481+
478482 if elems is None :
479483 raise ValueError (msg )
480484
481- if elems is not None and elems .find ("*" ) is None and elems .attrib is None :
482- raise ValueError (msg )
485+ if elems is not None :
486+ if self .elems_only and children == []:
487+ raise ValueError (msg )
488+ elif self .attrs_only and attrs == {}:
489+ raise ValueError (msg )
490+ elif children == [] and attrs == {}:
491+ raise ValueError (msg )
483492
484493 except (KeyError , SyntaxError ):
485494 raise SyntaxError (
@@ -488,6 +497,8 @@ def _validate_path(self) -> None:
488497 "undeclared namespace prefix."
489498 )
490499
500+ return elems
501+
491502 def _validate_names (self ) -> None :
492503 children : list [Any ]
493504
@@ -554,8 +565,7 @@ def parse_data(self) -> list[dict[str, str | None]]:
554565 self .xsl_doc = self ._parse_doc (self .stylesheet )
555566 self .xml_doc = self ._transform_doc ()
556567
557- self ._validate_path ()
558- elems = self .xml_doc .xpath (self .xpath , namespaces = self .namespaces )
568+ elems = self ._validate_path ()
559569
560570 self ._validate_names ()
561571
@@ -567,25 +577,33 @@ def parse_data(self) -> list[dict[str, str | None]]:
567577
568578 return xml_dicts
569579
570- def _validate_path (self ) -> None :
580+ def _validate_path (self ) -> list [ Any ] :
571581
572582 msg = (
573- "xpath does not return any nodes. "
574- "Be sure row level nodes are in xpath. "
583+ "xpath does not return any nodes or attributes. "
584+ "Be sure to specify in `xpath` the parent nodes of "
585+ "children and attributes to parse. "
575586 "If document uses namespaces denoted with "
576587 "xmlns, be sure to define namespaces and "
577588 "use them in xpath."
578589 )
579590
580591 elems = self .xml_doc .xpath (self .xpath , namespaces = self .namespaces )
581- children = self . xml_doc . xpath (self . xpath + "/*" , namespaces = self . namespaces )
582- attrs = self . xml_doc . xpath ( self . xpath + "/@*" , namespaces = self . namespaces )
592+ children = [ ch for el in elems for ch in el . xpath ("*" )]
593+ attrs = { k : v for el in elems for k , v in el . attrib . items ()}
583594
584595 if elems == []:
585596 raise ValueError (msg )
586597
587- if elems != [] and attrs == [] and children == []:
588- raise ValueError (msg )
598+ if elems != []:
599+ if self .elems_only and children == []:
600+ raise ValueError (msg )
601+ elif self .attrs_only and attrs == {}:
602+ raise ValueError (msg )
603+ elif children == [] and attrs == {}:
604+ raise ValueError (msg )
605+
606+ return elems
589607
590608 def _validate_names (self ) -> None :
591609 children : list [Any ]
0 commit comments