Bmansurov has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/362297 )
Change subject: Allow certain media elements not to be removed ...................................................................... Allow certain media elements not to be removed Before this patch, if media was set to be removed all media files were removed from the output. This patch allows media files with certain class names to stay in the output. For example, the following code removes all images, while keeping math formulas rendered as an image: $fmt = new ExtractFormatter( 'some html text' ); $fmt->setRemoveMedia( true ); $fmt->keepMediaClass( 'img': [ 'mwe-math-fallback-image', 'mwe-math-fallback-image-inline' ] ); // $fmt->getText(); Bug: T159065 Change-Id: Id1934e4fb9aaa16e6f0de2db278817b7149c2923 --- M src/HtmlFormatter.php 1 file changed, 32 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/HtmlFormatter refs/changes/97/362297/1 diff --git a/src/HtmlFormatter.php b/src/HtmlFormatter.php index e93ab1f..a185a4d 100644 --- a/src/HtmlFormatter.php +++ b/src/HtmlFormatter.php @@ -33,7 +33,14 @@ private $itemsToRemove = []; private $elementsToFlatten = []; protected $removeMedia = false; - + /** + * @var array Media classes to keep when `$this->removeMedia` is `true`. + * + * For example, the value [ 'img' => [ 'mwe-math-fallback-image-inline' ] ] + * keeps `img` elements with class name `mwe-math-fallback-image-inline`, + * while all other `img` elements are removed. + */ + private $mediaClassesToKeep = []; /** * Constructor * @@ -92,6 +99,16 @@ */ public function setRemoveMedia( $flag = true ) { $this->removeMedia = $flag; + } + + /** + * Adds class names of images/videos/sounds to keep. + * @param array $classes Array key is a tag name, e.g. 'img', and array + * value is an array of class names, e.g. [ 'mwe-math-fallback-image' ] + */ + public function keepMediaClass( $classes ) { + $this->mediaClassesToKeep = array_merge( + $this->mediaClassesToKeep, $classes ); } /** @@ -159,8 +176,21 @@ foreach ( $removals['TAG'] as $tagToRemove ) { $tagToRemoveNodes = $doc->getElementsByTagName( $tagToRemove ); foreach ( $tagToRemoveNodes as $tagToRemoveNode ) { + $shouldRemoveTag = true; if ( $tagToRemoveNode ) { - $domElemsToRemove[] = $tagToRemoveNode; + if ( array_key_exists( $tagToRemove, $this->mediaClassesToKeep ) ) { + $classes = $tagToRemoveNode->getAttribute( 'class' ); + $mediaClassesToKeep = $this->mediaClassesToKeep[ $tagToRemove ]; + foreach ( $mediaClassesToKeep as $classToKeep ) { + if ( \preg_match( "/\b$classToKeep\b/", $classes ) ) { + $shouldRemoveTag = false; + break; + } + } + } + if ( $shouldRemoveTag ) { + $domElemsToRemove[] = $tagToRemoveNode; + } } } } -- To view, visit https://gerrit.wikimedia.org/r/362297 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id1934e4fb9aaa16e6f0de2db278817b7149c2923 Gerrit-PatchSet: 1 Gerrit-Project: HtmlFormatter Gerrit-Branch: master Gerrit-Owner: Bmansurov <bmansu...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits