|
| 1 | +from typing import List |
| 2 | + |
1 | 3 | import pandas as pd |
2 | 4 |
|
| 5 | +from pandas._typing import FilePathOrBuffer, Scalar |
| 6 | + |
3 | 7 | from pandas.compat._optional import import_optional_dependency |
4 | 8 |
|
5 | | -from pandas.io.parsers import TextParser |
| 9 | +from pandas.io.excel._base import _BaseExcelReader |
6 | 10 |
|
7 | 11 |
|
8 | | -class _ODFReader: |
| 12 | +class _ODFReader(_BaseExcelReader): |
9 | 13 | """Read tables out of OpenDocument formatted files |
10 | 14 |
|
11 | 15 | Parameters |
12 | 16 | ---------- |
13 | 17 | filepath_or_buffer: string, path to be parsed or |
14 | 18 | an open readable stream. |
15 | 19 | """ |
16 | | - def __init__(self, filepath_or_buffer): |
| 20 | + def __init__(self, filepath_or_buffer: FilePathOrBuffer): |
17 | 21 | import_optional_dependency("odf") |
18 | | - self.document = document_load(filepath_or_buffer) |
19 | | - self.tables = self.document.getElementsByType(Table) |
20 | 22 | super().__init__(filepath_or_buffer) |
21 | 23 |
|
22 | 24 | @property |
23 | | - def sheet_names(self): |
| 25 | + def _workbook_class(self): |
| 26 | + from odf.opendocument import OpenDocument |
| 27 | + return OpenDocument |
| 28 | + |
| 29 | + def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): |
| 30 | + from odf.opendocument import load |
| 31 | + return load(filepath_or_buffer) |
| 32 | + |
| 33 | + @property |
| 34 | + def sheet_names(self) -> List[str]: |
24 | 35 | """Return a list of sheet names present in the document""" |
25 | 36 | from odf.namespaces import TABLENS |
26 | | - return [t.attributes[(TABLENS, 'name')] for t in self.tables] |
| 37 | + from odf.table import Table |
27 | 38 |
|
28 | | - def get_sheet_by_index(self, index): |
29 | | - return self.tables[index] |
| 39 | + tables = self.book.getElementsByType(Table) |
| 40 | + return [t.attributes[(TABLENS, 'name')] for t in tables] |
30 | 41 |
|
31 | | - def get_sheet_by_name(self, name): |
32 | | - i = self.sheet_names.index(name) |
33 | | - return self.tables[i] |
| 42 | + def get_sheet_by_index(self, index: int): |
| 43 | + from odf.table import Table |
| 44 | + tables = self.book.getElementsByType(Table) |
| 45 | + return tables[index] |
34 | 46 |
|
35 | | - def _get_sheet(self, name): |
36 | | - """Given a sheet name or index, return the root ODF Table node |
37 | | - """ |
38 | | - if isinstance(name, str): |
39 | | - return self.get_sheet_by_name(name) |
40 | | - elif isinstance(name, int): |
41 | | - return self.get_sheet_by_index(name) |
42 | | - else: |
43 | | - raise ValueError( |
44 | | - 'Unrecognized sheet identifier type {}. Please use' |
45 | | - 'a string or integer'.format(type(name))) |
| 47 | + def get_sheet_by_name(self, name: str): |
| 48 | + from odf.namespaces import TABLENS |
| 49 | + from odf.table import Table |
| 50 | + |
| 51 | + tables = self.book.getElementsByType(Table) |
46 | 52 |
|
47 | | - def parse(self, sheet_name=0, **kwds): |
48 | | - tree = self._get_sheet(sheet_name) |
49 | | - data = self.get_sheet_data(tree, convert_float=False) |
50 | | - parser = TextParser(data, **kwds) |
51 | | - return parser.read() |
| 53 | + key = (TABLENS, "name") |
| 54 | + for table in tables: |
| 55 | + if table.attributes[key] == name: |
| 56 | + return table |
| 57 | + |
| 58 | + raise ValueError("sheet {name} not found".format(name)) |
52 | 59 |
|
53 | 60 | def get_sheet_data(self, sheet, convert_float): |
54 | 61 | """Parse an ODF Table into a list of lists |
@@ -97,7 +104,6 @@ def get_sheet_data(self, sheet, convert_float): |
97 | 104 |
|
98 | 105 | def _get_row_repeat(self, row): |
99 | 106 | """Return number of times this row was repeated |
100 | | -
|
101 | 107 | Repeating an empty row appeared to be a common way |
102 | 108 | of representing sparse rows in the table. |
103 | 109 | """ |
|
0 commit comments