#!/bin/sh
#
# Tiny man fake using online manuals.
# Copyright (C) 2009-2015 SliTaz GNU/Linux.
#
. /lib/libtaz.sh

# Internationalization.
TEXTDOMAIN='slitaz-base'
. /etc/locale.conf
export TEXTDOMAIN LANG

display_html() {
	if [ -x /usr/bin/retawq ]; then
		retawq --dump=file://"$1" | less -M
	else
		# Rationale: busybox "less" is unable to use control chars, so we're
		# stuck with regular text (no bold, colors...), and use other ways to
		# emphasis: quotes, underscores, brackets, tabs.
		# Explanation for following sequence:
		# 1) keep only what's in the HTML body
		# 2) make sure <pre> and </pre> are on a line of their own (see 3)
		# 3a) newlines must be obeyed between <pre> tags, so explicitly add <br>
		#     because newlines are going to be stripped; also remove <em> (added
		#     emphasis characters would be misleading)
		# 3b) the </pre> line is matched also, but should not get a <br> added,
		#     so remove it (let it on the <pre> line, starts a new paragraph)
		# 4) strip newlines, remove comments (should be done earlier)
		# 5) emphasize headers and titles with a Tab, add newline after
		# 6) add newline for <br> <p> </p> <section> <article[ id=...]>
		# 7) suppress <a [href=...]> </a> and some other tags
		# 8) add newline and hyphen for list items, newline for list end
		# 9) emphasize <tt> and <code> blocks by ‘’ ; <em> by _
		# 10) render x-details between brackets []
		# 11) remove start-of-line and end-of-line spaces
		# 12) render HTML-encoded chars
		# 13) wrap at word boundaries
		cat "$1" | sed '1,/<body>/d; /<\/body>/d' \
		| sed -r 's!(.)<(/?)pre>!\1\n<\2pre>!g; s!<(/?)pre>(.)!<\1pre>\n\2!g;' \
		| sed -r '/<pre>/,/<\/pre>/{s!$!<br>!g; s!</?em>!!g}; \
			/<\/pre>/s!<br>$!!' | tr '\n' ' ' \
		| sed -r 's%<!-- .* -->%%g; \
			s!<(header|h[1-4])>!	!g; s!<footer>!\n	!; \
			s!</(header|h[1-4]|footer)>!\n!g; \
			s!<(br|/?p|section|article[^>]*)>!\n!g; \
			s!<(a [^>]*|/a|/section|/article|/html|/?pre|/?sup)>!!g; \
			s!<li>!\n — !g; s!<ul>!!g; s!</(li|ul)>!\n!g; \
			s!<(tt|code)>!‘!g; s!</(tt|code)>!’!g; s!</?em>!_!g; \
			s!<x-details>![!g; s!</x-details>!]!g; \
			s!&lt;!<!g; s!&gt;!>!g; s!&copy;!©!g; s!&quot;!"!g; s!&amp;!\&!g; \
			s!&reg;!™!g; ' | sed -r 's!(^ *| *$)!!' \
		| fold -s | less -M
	fi
}

case "$1" in
	''|-*)
		emsg "$(_ '<b>Usage:</b> man [section] command')"
		return ;;
esac

SECTION='all'
MAN_SECTION='[1-8]'
MAN_LANG=$(locale | sed -nr 's/LC_MESSAGES="?([^"_.]*).*"?/\1/p')
MSG=''

if [ -n "$2" ]; then
	SECTION="$1"
	MAN_SECTION="$1"
	MSG=" $(_n 'in section %s' "$SECTION")"
	shift
fi

TOPIC="$1"

# localized SliTaz doc?
DOC=$(find /usr/share/doc/"$TOPIC" /usr/share/doc/slitaz* \
	-name "$TOPIC".$MAN_LANG.html 2>/dev/null | head -1)
# generic SliTaz doc?
[ -n "$DOC" ] || DOC=$(find /usr/share/doc/"$TOPIC" /usr/share/doc/slitaz* \
	-name "$TOPIC".html 2>/dev/null | head -1)
# other doc?
[ -n "$DOC" ] || DOC=$(find /usr/share/doc \
	-name "$TOPIC".html 2>/dev/null | head -1)

if [ -n "$DOC" ]; then
	display_html "$DOC"
	return
elif [ -f "/usr/share/doc/slitaz/$TOPIC.txt" ]; then
	# SliTaz tools/libraries documentation (man-alike format)
	less -M "/usr/share/doc/slitaz/$TOPIC.txt"
	return
fi

MANPAGE=$(find /usr/share/man/$MAN_LANG*/man$MAN_SECTION \
	/usr/local/share/man/$MAN_LANG*/man$MAN_SECTION \
	/usr/local/man/$MAN_LANG*/man$MAN_SECTION \
	/usr/share/man/man$MAN_SECTION \
	/usr/local/share/man/man$MAN_SECTION \
	/usr/local/man/man$MAN_SECTION \
	-name "$TOPIC".$MAN_SECTION\* 2>/dev/null)
if [ -n "$MANPAGE" ]; then
	case "$MANPAGE" in
		*html) display_html "$MANPAGE"
			return;;
	esac
	# "less"-ing a manpage is a BAD IDEA: man format is unreadable as is.
	# Use nroff if available; it outputs control chars, which busybox less
	# cannot handle: use "more" instead (or GNU less)
	if [ -x /usr/bin/nroff ]; then
		if [ x$(readlink $(which less)) = x/bin/busybox ]; then
			VIEW_CMD="more"
		else
			VIEW_CMD="less -rM"
		fi
		case "$MANPAGE" in
			*gz) (zcat "$MANPAGE" || unlzma -c "$MANPAGE" 2>/dev/null) | \
				nroff -man | $VIEW_CMD;;
			*) nroff -man "$MANPAGE" | $VIEW_CMD;;
		esac
		return
	else
		_ 'Found local manpage %s but no tool to display it.' $MANPAGE
		_ 'Consider installing groff by running: %s' \
			"su -c 'tazpkg get-install groff'"
		# do not quit, go on searching online
	fi
fi

if [ "$SECTION" = 'all' ]; then
	#search alphabetically
	LETTER=$(printf "${TOPIC::1}" | tr 'A-Z[:punct:][:digit:]' a-z00)
	if [ $LETTER = '0' ]; then LETTER=other; fi
	SECTIONS=$(wget -q -O - http://linux.die.net/man/$LETTER.html | \
		sed -n -r "s%.*href=\"(.)/$TOPIC\".*%\1%p")
	[ -n "$SECTIONS" ] || { _ 'No manual entry for %s' "$TOPIC"; exit 0;}
	if [ $(printf '%s\n' "$SECTIONS"|wc -l) -eq 1 ]; then
		SECTION=$SECTIONS
	else
		_n '%s found in the following sections:\n%s\nPlease choose one: ' \
			"$TOPIC" "$SECTIONS"
		read SECTION
	fi
fi

MANURL="http://linux.die.net/man/$SECTION/$TOPIC"

if ! wget -q --spider "$MANURL" 2>/dev/null ; then
	_ 'No manual entry for %s' "$TOPIC$MSG"
	exit 0
fi

if [ -x /usr/bin/retawq ]; then
	retawq "$MANURL"
else
	URL="http://www.w3.org/services/html2txt?url=$MANURL&noinlinerefs=on&nonums=on"
	wget -q -O - "$URL" | tail -n+3 | \
		sed 's!\[[0-9]*\]!!g;/\[INS: :INS\]/d;/^   Site Search$/,$d' | less -M
fi

exit 0
