diff --git a/CHANGELOG.md b/CHANGELOG.md index dfcce5a4d2..e7b7ed6869 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ v0.15.0 (?? ??? 2018) - Add support for table indent (tblInd) @Trainmaster #1343 - Added parsing of internal links in HTML reader @lalop #1336 - Several improvements to charts @JAEK-S #1332 +- Add parsing of html image in base64 format @jgpATs2w #1382 ### Fixed - Fix reading of docx default style - @troosan #1238 diff --git a/samples/resources/Sample_30_ReadHTML.html b/samples/resources/Sample_30_ReadHTML.html index 5593298bfb..b3d2fad222 100644 --- a/samples/resources/Sample_30_ReadHTML.html +++ b/samples/resources/Sample_30_ReadHTML.html @@ -11,5 +11,15 @@

Adding element via HTML

Ordered (numbered) list:

  1. Item 1
  2. Item 2
+ +

Double height

+ +

Includes images

+ + + + + + diff --git a/samples/results/.gitignore b/samples/results/.gitignore old mode 100644 new mode 100755 diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index cbdcecd537..239cfd1dbb 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -20,6 +20,7 @@ use PhpOffice\PhpWord\Element\AbstractContainer; use PhpOffice\PhpWord\Element\Row; use PhpOffice\PhpWord\Element\Table; +use PhpOffice\PhpWord\Settings; use PhpOffice\PhpWord\SimpleType\Jc; use PhpOffice\PhpWord\SimpleType\NumberFormat; @@ -32,6 +33,7 @@ class Html { private static $listIndex = 0; private static $xpath; + private static $options; /** * Add HTML parts. @@ -44,13 +46,17 @@ class Html * @param string $html The code to parse * @param bool $fullHTML If it's a full HTML, no need to add 'body' tag * @param bool $preserveWhiteSpace If false, the whitespaces between nodes will be removed + * @param array $options: + * + IMG_SRC_SEARCH: optional to speed up images loading from remote url when files can be found locally + * + IMG_SRC_REPLACE: optional to speed up images loading from remote url when files can be found locally */ - public static function addHtml($element, $html, $fullHTML = false, $preserveWhiteSpace = true) + public static function addHtml($element, $html, $fullHTML = false, $preserveWhiteSpace = true, $options = null) { /* * @todo parse $stylesheet for default styles. Should result in an array based on id, class and element, * which could be applied when such an element occurs in the parseNode function. */ + self::$options = $options; // Preprocess: remove all line ends, decode HTML entity, // fix ampersand and angle brackets and add body tag for HTML fragments @@ -141,6 +147,7 @@ protected static function parseNode($node, $element, $styles = array(), $data = 'sup' => array('Property', null, null, $styles, null, 'superScript', true), 'sub' => array('Property', null, null, $styles, null, 'subScript', true), 'span' => array('Span', $node, null, $styles, null, null, null), + 'font' => array('Span', $node, null, $styles, null, null, null), 'table' => array('Table', $node, $element, $styles, null, null, null), 'tr' => array('Row', $node, $element, $styles, null, null, null), 'td' => array('Cell', $node, $element, $styles, null, null, null), @@ -648,7 +655,52 @@ private static function parseImage($node, $element) break; } } - $newElement = $element->addImage($src, $style); + $originSrc = $src; + if (strpos($src, 'data:image') !== false) { + $tmpDir = Settings::getTempDir() . '/'; + + $match = array(); + preg_match('/data:image\/(\w+);base64,(.+)/', $src, $match); + + $src = $imgFile = $tmpDir . uniqid() . '.' . $match[1]; + + $ifp = fopen($imgFile, 'wb'); + + if ($ifp !== false) { + fwrite($ifp, base64_decode($match[2])); + fclose($ifp); + } + } + $src = urldecode($src); + + if (!is_file($src) + && !is_null(self::$options) + && isset(self::$options['IMG_SRC_SEARCH']) + && isset(self::$options['IMG_SRC_REPLACE'])) { + $src = str_replace(self::$options['IMG_SRC_SEARCH'], self::$options['IMG_SRC_REPLACE'], $src); + } + + if (!is_file($src)) { + if ($imgBlob = @file_get_contents($src)) { + $tmpDir = Settings::getTempDir() . '/'; + $match = array(); + preg_match('/.+\.(\w+)$/', $src, $match); + $src = $tmpDir . uniqid() . '.' . $match[1]; + + $ifp = fopen($src, 'wb'); + + if ($ifp !== false) { + fwrite($ifp, $imgBlob); + fclose($ifp); + } + } + } + + if (is_file($src)) { + $newElement = $element->addImage($src, $style); + } else { + throw new \Exception("Could not load image $originSrc"); + } return $newElement; } diff --git a/tests/PhpWord/Shared/HtmlTest.php b/tests/PhpWord/Shared/HtmlTest.php index b61418e01c..32f4bf460c 100644 --- a/tests/PhpWord/Shared/HtmlTest.php +++ b/tests/PhpWord/Shared/HtmlTest.php @@ -116,6 +116,21 @@ public function testParseTextDecoration() $this->assertEquals('single', $doc->getElementAttribute('/w:document/w:body/w:p/w:r/w:rPr/w:u', 'w:val')); } + /** + * Test font + */ + public function testParseFont() + { + $html = 'test'; + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + Html::addHtml($section, $html); + + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + $this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:r/w:rPr')); + //TODO check style + } + /** * Test line-height style */ @@ -447,6 +462,78 @@ public function testParseImage() $this->assertStringMatchesFormat('%Smso-position-horizontal:left%S', $doc->getElementAttribute($baseXpath . '[2]/w:pict/v:shape', 'style')); } + /** + * Test parsing of remote img + */ + public function testParseRemoteImage() + { + $src = 'https://phpword.readthedocs.io/en/latest/_images/phpword.png'; + + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + $html = '

'; + Html::addHtml($section, $html); + + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + + $baseXpath = '/w:document/w:body/w:p/w:r'; + $this->assertTrue($doc->elementExists($baseXpath . '/w:pict/v:shape')); + } + + /** + * Test parsing embedded image + */ + public function testParseEmbeddedImage() + { + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + $html = '

'; + Html::addHtml($section, $html); + + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + + $baseXpath = '/w:document/w:body/w:p/w:r'; + $this->assertTrue($doc->elementExists($baseXpath . '/w:pict/v:shape')); + } + + /** + * Test parsing of remote img that can be found locally + */ + public function testParseRemoteLocalImage() + { + $src = 'https://fakedomain.io/images/firefox.png'; + $localPath = __DIR__ . '/../_files/images/'; + $options = array( + 'IMG_SRC_SEARCH' => 'https://fakedomain.io/images/', + 'IMG_SRC_REPLACE' => $localPath, + ); + + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + $html = '

'; + Html::addHtml($section, $html, false, true, $options); + + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + + $baseXpath = '/w:document/w:body/w:p/w:r'; + $this->assertTrue($doc->elementExists($baseXpath . '/w:pict/v:shape')); + } + + /** + * Test parsing of remote img that can be found locally + * + * @expectedException \Exception + */ + public function testCouldNotLoadImage() + { + $src = 'https://fakedomain.io/images/firefox.png'; + + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + $html = '

'; + Html::addHtml($section, $html, false, true); + } + public function testParseLink() { $phpWord = new \PhpOffice\PhpWord\PhpWord();