http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/123.0

# July 2020 redesign

tidy: no
prune: no

title: //article[@data-area="article"]/@aria-label
author: //meta[@name="author"]/@content
date: //meta[@name="date"]/@content
body: //article[@data-area="article"]//header//div[contains(@class, 'leading-loose')] | //div[@data-article-el="body"]

# Remove subscription advertiser
strip: //div[@data-area='paywall']

# Remove video with lots of inner tags
strip_id_or_class: jwplayer
strip: //div[@data-component="JWPlayer"]

# Remove lazy-loaded imags (old method)
#replace_string(<noscript>): <!-- noscript -->
#replace_string(</noscript>): <!-- /noscript -->
#strip: //img[contains(concat(" ", normalize-space(@class), " "), " lazyload ")]

# New method
replace_string(src="data): data-src-disabled="data
replace_string(data-src="): src="
strip: //picture/following-sibling::noscript[1]

strip_id_or_class: pointer-events-none
strip_id_or_class: border-separator-b
strip: //button
strip: //svg
strip: //*[@data-app-hidden="true"]
#strip: //*[@data-advertisement]
strip: //aside
# Remove 'Mehr zum Thema' section
strip: //div[@data-article-el="body"]//section/div[contains(./span/text(), 'Mehr zum Thema')]

# remove pull quotes
strip: //div[@data-article-el="body"]//div[contains(concat(" ", normalize-space(@class), " "), " my-32 ")]

# field name is always the id of the article (a-710880) instead of the title...
skip_json_ld: yes

test_url: https://www.spiegel.de/politik/deutschland/barbara-borchardt-skandal-die-linke-grinst-sich-eins-kolumne-a-c1e59f4c-9bbd-42e2-a144-81b7115584ef
test_contains: Und sie kann es, weil sie keiner in die Schranken weist.
test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html
test_url: http://www.spiegel.de/wirtschaft/soziales/griechenland-was-den-griechischen-buergern-nun-droht-a-1042682.html
test_url: http://www.spiegel.de/spiegel/a-710880.html
