Remove anchors from content with ContentRewriter

I want to remove anchor tags using a custom ContentRewriter.

I am using the following code:

import org.hippoecm.hst.content.rewriter.impl.SimpleContentRewriter;
import org.hippoecm.hst.core.request.HstRequestContext;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.jcr.Node;

public class ExternalLinksRemoveContentRewriter
        extends SimpleContentRewriter {


    private final static Logger log =
            LoggerFactory.getLogger(ExternalLinksRemoveContentRewriter.class);

    private static boolean htmlCleanerInitialized;
    private static HtmlCleaner cleaner;

    private static synchronized void initCleaner() {
        if (!htmlCleanerInitialized) {
            cleaner = new HtmlCleaner();
            CleanerProperties properties = cleaner.getProperties();
            properties.setOmitHtmlEnvelope(true);
            properties.setTranslateSpecialEntities(false);
            properties.setOmitXmlDeclaration(true);
            properties.setRecognizeUnicodeChars(false);
            properties.setOmitComments(true);
            properties.setPruneTags("a");
            htmlCleanerInitialized = true;
        }
    }

    protected static HtmlCleaner getHtmlCleaner() {
        if (!htmlCleanerInitialized) {
            initCleaner();
        }
        return cleaner;
    }

    @Override
    public String rewrite(final String html, final Node node,
                          final HstRequestContext requestContext) {

        log.debug("...start cleaning html code ....");

        if (html == null) {
            if (html == null || HTML_TAG_PATTERN.matcher(html).find() ||
                    BODY_TAG_PATTERN.matcher(html).find()) {
                return null;
            }
        }

        TagNode rootNode = getHtmlCleaner().clean(html);

        //super.rewrite(html, node, requestContext);
        return getHtmlCleaner().getInnerHtml(rootNode);

    }

}

Result: Tags are removed as expected- but also images are removed !
I read on documentation, that I should call super.rewrite(…) at the end, what I tried without success :frowning:

Debugging the the code I saw that right before the return statement, the rootNode still contains the img-tag. Where is it gone ???

What I also do not understand, when I Overwrite rewrite without Node node the code does not work at all - its even not called.

After reading the htmlcleaner doc I found the following solution, which I would like to share:

 SimpleHtmlSerializer simpleHtmlSerializer = new SimpleHtmlSerializer(cleaner.getProperties());
 return simpleHtmlSerializer.getAsString(rootNode);