#!/bin/sh
#
# compressor - module of the SliTaz Cook
# Copyright (C) SliTaz GNU/Linux - GNU GPL v3
#

. /usr/lib/slitaz/libcook.sh


# Compressor cache stuff

comp_cache_root='/home/slitaz/cache/cook'
mkdir -p "$comp_cache_root"
cache_stat=$(mktemp)

# Cache notes.
# Do not do the same job twice. Getting the file from the cache is much faster
# than compressing the file one more time. In addition, this cache is trying not
# to take extra space, using the hardlinks. Although the files from the cache
# without reference to itself should be removed periodically.




#
# Functions
#


# Working with time (with hundredths precision)

get_time() {
	cut -d" " -f2 /proc/uptime
}

calc_time() {
	# L10n: 's' is for seconds, 'm' is for minutes
	awk -va="$1" -vb="$(get_time)" -vs="$(_ 's')" -vm="$(_ 'm')" '
	BEGIN{
		time = b - a;
		if (time < 30)
			printf("%.2f%s\n", time, s);
		else
			printf("%.2f%s ~ %.0f%s\n", time, s, time / 60, m);
	}'
}


# Compressor mini summary

comp_summary() {
	# "$time0" "$size0" "$size1"
	status
	[ "$2" -eq 0 ] && return
	saving=$(awk -va="$2" -vb="$3" 'BEGIN{ printf("%.0f\n", (a - b) / 1024) }')
	cache_msg=''
	if [ -s "$cache_stat" ]; then
		cache_msg=$(_n ' Cache hit: %d/%d.' "$(fgrep '+' $cache_stat | wc -l)" "$(wc -l < $cache_stat)")
		echo -n > $cache_stat
	fi
	_ '  Time: %s. Size: %s B -> %s B. Save: %s KB.%s' \
		"$(calc_time $1)" "$2" "$3" "$saving" "$cache_msg"
}


# Calculating different sizes

sizes() {
	case $1 in
		man) find $install/usr/share/man          -type f                                       -exec ls -l \{\} \; ;;
		png) find $install                        -type f -name '*.png'                         -exec ls -l \{\} \; ;;
		svg) find $install                        -type f -name '*.svg'                         -exec ls -l \{\} \; ;;
		xml) find $install                        -type f \( -name '*.ui' -o -name '*.glade' \) -exec ls -l \{\} \; ;;
		des) find $install                        -type f -name '*.desktop'                     -exec ls -l \{\} \; ;;
		mo1) find $install                        -type f -name '*.mo'                          -exec ls -l \{\} \; ;;
		loc) find $install/usr/share/i18n/locales -type f                                       -exec ls -l \{\} \; ;;
		mo2) find $fs/usr/share/locale            -type f -name '*.mo'                          -exec ls -l \{\} \; ;;
		gz)  find $install                        -type f -name '*.gz' ! -path '*/share/man/*'  -exec ls -l \{\} \; ;;
	esac | awk '{s+=$5}END{print s}'
}


# Query cache for already existing compressed file; substitute original file with the cached
# compressed one if so.
# $1: cache section (gz, mangz, png, etc.); $2: path to original file

query_cache() {
	md5=$(md5sum "$2")
	cachefile="$comp_cache_root/$1/${md5%% *}"
	echo "$cachefile"
	if [ -f "$cachefile" ]; then
		ln -f "$cachefile" "$2"
		echo '+' >> "$cache_stat"
	else
		echo '-' >> "$cache_stat"
		false
	fi
}


# Store compressed file to the cache
# $1: path to cache entry to be stored; $2: path to compressed file to be stored

store_cache() {
	mkdir -p "${1%/*}"
	mv "$2" "$1"
	ln "$1" "$2"
}


# Function to compress all man pages
# Compressing can be disabled with COOKOPTS="!manz"

compress_manpages() {
	time0=$(get_time)
	[ "${COOKOPTS/!manz/}" != "$COOKOPTS" ] && return
	manpath="$install/usr/share/man"
	[ -d "$manpath" ] || return
	size0=$(sizes man); [ -z "$size0" ] && return

	tazpkg -gi advancecomp --quiet --cookmode

	action 'Compressing man pages...'

	# We'll use only Gzip compression, so decompress other formats first
	find $manpath -type f -name '*.bz2' -exec bunzip2 \{\} \;
	find $manpath -type f -name '*.xz'  -exec unxz    \{\} \;

	# Fast compress with gzip
	find $manpath -type f ! -name '*.gz' -exec gzip \{\} \;

	# Fix symlinks
	for i in $(find $manpath -type l); do
		dest=$(readlink $i | sed 's|\.[gbx]z2*$||')
		link=$(echo $i | sed 's|\.[gbx]z2*$||')
		rm $i; ln -s $dest.gz $link.gz
	done

	# Recompress with advdef (it can't compress, only recompress)
	IFS=$'\n'
	for i in $(find $manpath -type f); do
		if ! cached_path=$(query_cache mangz "$i"); then
			advdef -z4q "$i"
			store_cache "$cached_path" "$i"
		fi
	done

	comp_summary "$time0" "$size0" "$(sizes man)"
}


# Function to recompress all gzip archives
# Recompressing can be disabled with COOKOPTS="!gz"

recompress_gz() {
	time0=$(get_time)
	[ "${COOKOPTS/!gz/}" != "$COOKOPTS" ] && return
	size0=$(sizes gz); [ -z "$size0" ] && return

	tazpkg -gi advancecomp --quiet --cookmode

	action 'Recompressing gzip files...'

	# Recompress with advdef
	IFS=$'\n'
	for i in $(find $install -type f -name '*.gz' ! -path '*/share/man/*'); do
		if ! cached_path=$(query_cache gz "$i"); then
			advdef -z4q "$i"
			store_cache "$cached_path" "$i"
		fi
	done

	comp_summary "$time0" "$size0" "$(sizes gz)"
}


# Function used after compile_rules() to compress all png images
# Compressing can be disabled with COOKOPTS="!pngz"

compress_png() {
	time0=$(get_time)
	[ "${COOKOPTS/!pngz/}" != "$COOKOPTS" ] && return
	size0=$(sizes png); [ -z "$size0" ] && return

	use_pq=true
	use_op=true
	[ "${COOKOPTS/!pngquant/}" != "$COOKOPTS" ] && use_pq=false
	[ "${COOKOPTS/!optipng/}"  != "$COOKOPTS" ] && use_op=false
	$use_pq && tazpkg -gi pngquant --quiet --cookmode
	$use_op && tazpkg -gi optipng  --quiet --cookmode

	action 'Compressing png images...'

	oplevel=$(echo $COOKOPTS | grep 'op[0-8]' | sed 's|.*op\([0-8]\).*|\1|')
	[ -z "$oplevel" ] && oplevel='2'

	cache_section="png$oplevel"
	$use_pq && cache_section="${cache_section}p"
	$use_op && cache_section="${cache_section}o"

	[ "$oplevel" == '8' ] && oplevel='7 -zm1-9'

	IFS=$'\n'
	for i in $(find $install -type f -name '*.png'); do
		if ! cached_path=$(query_cache $cache_section "$i"); then
			$use_pq && pngquant -f --skip-if-larger --ext .png --speed 1 "$i"
			$use_op && optipng -quiet -strip all -o$oplevel "$i"
			store_cache "$cached_path" "$i"
		fi
	done

	comp_summary "$time0" "$size0" "$(sizes png)"
}


# Function used after compile_rules() to compress all svg images
# Compressing can be disabled with COOKOPTS="!svgz"

compress_svg() {
	time0=$(get_time)
	[ "${COOKOPTS/!svgz/}" != "$COOKOPTS" ] && return
	size0=$(sizes svg); [ -z "$size0" ] && return

	tazpkg -gi svgcleaner --quiet --cookmode

	action 'Compressing svg images...'

	cleaner_log="$(mktemp)"
	IFS=$'\n'
	for i in $(find $install -type f -name '*.svg'); do
		echo -n "$i: " >> "$cleaner_log"
		svgcleaner "$i" "$i" --remove-unresolved-classes false --quiet true >> "$cleaner_log"
	done

	comp_summary "$time0" "$size0" "$(sizes svg)"

	sed -i '/: $/d' "$cleaner_log"
	if [ -s "$cleaner_log" ]; then
		_ 'Cleaner warnings and errors:'
		awk '{printf "  %s\n", $0;}' "$cleaner_log"
		echo
	fi
	rm "$cleaner_log"
}


# Function used after compile_rules() to shrink all *.ui and *.glade files:
# remove insignificant spaces and comments
# Compressing can be disabled with COOKOPTS="!uiz"

compress_ui() {
	[ "${COOKOPTS/!uiz/}" != "$COOKOPTS" ] && return
	[ -z "$(find $install -type f \( -name '*.ui' -o -name '*.glade' \) )" ] && return

	tazpkg -gi xmlstarlet --quiet --cookmode

	action 'Compressing ui files...'

	size0=$(sizes xml)
	time0=$(get_time)
	temp_ui="$(mktemp)"
	IFS=$'\n'
	for ui in $(find $install -type f \( -name '*.ui' -o -name '*.glade' \) ); do
		xmlstarlet c14n --without-comments "$ui" | xmlstarlet sel -B -t -c '*' > "$temp_ui"
		cat "$temp_ui" > "$ui"
	done

	comp_summary "$time0" "$size0" "$(sizes xml)"
	rm "$temp_ui"
}


# Get list of supported locales...

get_supported_locales() {
	lpc='/slitaz-i18n/stuff/locale-pack.conf'
	if [ -e "$WOK$lpc" ]; then
		# ... from package in the local wok
		. "$WOK$lpc"
	else
		# ... from Hg
		temp_conf=$(mktemp)
		wget -q -O $temp_conf -T 10 "http://hg.slitaz.org/wok/raw-file/tip$lpc"
		if [ -s $temp_conf ]; then
			. $temp_conf
		else
			# Give up and use hardcoded list
			LOCALE_PACK="ar ca cs da de el en es fi fr hr hu id is it ja nb nl nn pl pt \
			pt_BR ro ru sl sv tr uk zh_CN zh_TW"
		fi
		rm $temp_conf
	fi
	echo $LOCALE_PACK
}


# Fix common errors and warnings in the .desktop files
# Fixing can be disabled with COOKOPTS="!fixdesktops"

fix_desktop_files() {
	[ "${COOKOPTS/!fixdesktops/}" != "$COOKOPTS" ] && return
	deskpath="$install/usr/share/applications"
	[ -d "$deskpath" ] || return
	[ -z "$(find $deskpath -type f -name '*.desktop')" ] && return

	size0=$(sizes des)
	time0=$(get_time)

	if [ -n "$QA" -a -z "$(which desktop-file-validate)" ]; then
		tazpkg -gi desktop-file-utils-extra --quiet --cookmode
	fi

	# The variable $LOCALE is set in cook.conf and may be overridden in the receipt.
	# Default value is "" (empty). That means for us that we'll use the full
	# list of supported locales here.
	[ -z "$LOCALE" ] && LOCALE=$(get_supported_locales)

	IFS=$'\n'
	for desktop in $(find $deskpath -type f -name '*.desktop'); do
		cp "$desktop" "$desktop.orig"

		# Sort out .desktop file (is prerequisite to correct working of `fix-desktop-file`)
		sdft "$desktop" -i

		# Fix common errors in .desktop file
		fix-desktop-file "$desktop"

		# Strip unsupported locales from .desktop file
		[ "${COOKOPTS/!i18nz/}" == "$COOKOPTS" ] &&
			sdft "$desktop" -i -k "$LOCALE"

		# Extra-strip
		[ "${COOKOPTS/!extradesktops/}" == "$COOKOPTS" ] &&
			sdft "$desktop" -i -g -x -tf -r 'Keywords*' -o

		if [ -n "$QA" ]; then
			# Check the rest of errors, warnings and tips
			_ 'QA: Checking %s...' "$(basename $desktop)"
			diff "$desktop.orig" "$desktop"
			desktop-file-validate "$desktop" | busybox fold -s
			echo
		fi

		rm "$desktop.orig"
	done

	comp_summary "$time0" "$size0" "$(sizes des)"
}


# Normalize all *.mo files: unconditionally convert to UTF-8; remove strings that are not really necessary
# to the translation (msgid = msgstr)
# Normalization can be disabled with COOKOPTS="!monorm"

normalize_mo() {
	[ "${COOKOPTS/!monorm/}" != "$COOKOPTS" ] && return
	[ -z "$(find $install -type f -name '*.mo')" ] && return

	# Gettext functions: msgunfmt, msguniq, msgconv, msgfmt
	tazpkg -gi gettext      --quiet --cookmode
	# Gconv modules (convert to UTF-8)
	tazpkg -gi glibc-locale --quiet --cookmode

	action 'Normalizing mo files...'

	size0=$(sizes mo1)
	time0=$(get_time)

	# Process all existing *.mo files
	IFS=$'\n'
	for mo in $(find "$install" -type f -name '*.mo'); do
		tmpfile="$(mktemp)"

		msgunfmt "$mo" | msguniq | msgconv -o "$tmpfile" -t 'UTF-8'
		# add newline
		echo >> "$tmpfile"

		# get Plural-Forms
		awk '
		BEGIN { skip = ""; }
		{
			if (! skip) {
				s = $0;
				gsub(/^[^\"]*\"/, "", s);
				gsub(/\"$/, "", s);
				printf("%s", s);
			}
			if (! $0) skip = "yes";
		}
		' "$tmpfile" | sed 's|\\n|\n|g' | grep "^Plural-Forms:" > "$tmpfile.pf"

		if ! grep -q 'msgid_plural' "$tmpfile"; then
			echo > "$tmpfile.pf"
		fi

		# main
		awk -v pf="$(cat "$tmpfile.pf")" '
		function clean() {
			mode = msgctxt = msgid = msgid_plural = msgstr = msgstr0 = msgstr1 = msgstr2 = msgstr3 = msgstr4 = msgstr5 = "";
		}

		function getstring() {
			# Skip unquoted words at the beginning (msgid, msgstr...) and get string from inside quotes
			s = $0;
			gsub(/^[^\"]*\"/, "", s);
			gsub(/\"$/, "", s);
			return s;
		}

		BEGIN {
			printf("msgid \"\"\nmsgstr \"\"\n\"Content-Type: text/plain; charset=UTF-8\\n\"\n");
			if (pf)
				printf("\"%s\\n\"\n", pf);
			printf("\n");
			skip = 1;
			clean();
		}

		{
			# Skip the entire header
			if (!skip) {
				if ($1 == "msgctxt" || $1 == "msgid" || $1 == "msgstr" || $1 == "msgid_plural")
					mode = $1;
				if ($1 == "msgstr[0]") mode = "msgstr0";
				if ($1 == "msgstr[1]") mode = "msgstr1";
				if ($1 == "msgstr[2]") mode = "msgstr2";
				if ($1 == "msgstr[3]") mode = "msgstr3";
				if ($1 == "msgstr[4]") mode = "msgstr4";
				if ($1 == "msgstr[5]") mode = "msgstr5";

				if (mode == "msgctxt")      msgctxt      = msgctxt      getstring();
				if (mode == "msgid")        msgid        = msgid        getstring();
				if (mode == "msgstr")       msgstr       = msgstr       getstring();
				if (mode == "msgid_plural") msgid_plural = msgid_plural getstring();
				if (mode == "msgstr0")      msgstr0      = msgstr0      getstring();
				if (mode == "msgstr1")      msgstr1      = msgstr1      getstring();
				if (mode == "msgstr2")      msgstr2      = msgstr2      getstring();
				if (mode == "msgstr3")      msgstr3      = msgstr3      getstring();
				if (mode == "msgstr4")      msgstr4      = msgstr4      getstring();
				if (mode == "msgstr5")      msgstr5      = msgstr5      getstring();

				if (! $0) {
					if (msgid != msgstr) {
						if (msgctxt)      printf("msgctxt \"%s\"\n",      msgctxt);
						                  printf("msgid \"%s\"\n",        msgid);
						if (msgid_plural) printf("msgid_plural \"%s\"\n", msgid_plural);
						if (msgstr)       printf("msgstr \"%s\"\n",       msgstr);
						if (msgstr0)      printf("msgstr[0] \"%s\"\n",    msgstr0);
						if (msgstr1)      printf("msgstr[1] \"%s\"\n",    msgstr1);
						if (msgstr2)      printf("msgstr[2] \"%s\"\n",    msgstr2);
						if (msgstr3)      printf("msgstr[3] \"%s\"\n",    msgstr3);
						if (msgstr4)      printf("msgstr[4] \"%s\"\n",    msgstr4);
						if (msgstr5)      printf("msgstr[5] \"%s\"\n",    msgstr5);
						                  printf("\n");
					}
					clean();
				}
			}
			if ($0 == "") skip = "";
		}
		' "$tmpfile" > "$tmpfile.awk"

		msgfmt "$tmpfile.awk" -o "$tmpfile.mo"

		if [ -s "$tmpfile.mo" ]; then
			rm "$mo"; mv "$tmpfile.mo" "$mo"
		else
			_ 'Error processing %s' "$mo"
			[ -e "$tmpfile.mo" ] && rm "$tmpfile.mo"
		fi

		# Clean
		rm "$tmpfile" "$tmpfile.pf" "$tmpfile.awk"
	done

	comp_summary "$time0" "$size0" "$(sizes mo1)"
}


# Strip locale definitions: normalize whitespace and remove comments
# Stripping can be disabled with COOKOPTS="!locdef"

strip_locale_def() {
	[ "${COOKOPTS/!locdef/}" != "$COOKOPTS" ] && return
	[ ! -d "$install/usr/share/i18n/locales" ] && return
	[ -z "$(find $install/usr/share/i18n/locales -type f)" ] && return

	action 'Stripping locale definitions...'
	size0=$(sizes loc)
	time0=$(get_time)

	for i in $(find $install/usr/share/i18n/locales -type f); do
		sed -i 's|	| |g; s|  *| |g; s|^ ||; /^%/d' $i
	done

	comp_summary "$time0" "$size0" "$(sizes loc)"
}


# Find and strip: --strip-all (-s) or --strip-debug on static libs as well
# as removing unneeded files like in Python packages. Cross compiled binaries
# must be stripped with cross-tools aka $ARCH-slitaz-*-strip
# Stripping can be disabled with COOKOPTS="!strip"

strip_package() {
	[ "${COOKOPTS/!strip/}" != "$COOKOPTS" ] && return

	case "$ARCH" in
		arm*|x86_64) export STRIP="$HOST_SYSTEM-strip" ;;
		*)           export STRIP='strip' ;;
	esac
	action 'Executing strip on all files...'
	size0=0
	size1=0
	time0=$(get_time)

	# Strip executable files
	for dir in $fs/bin $fs/sbin $fs/usr/bin $fs/usr/sbin $fs/usr/games; do
		if [ -d "$dir" ]; then
			oldsize=$(find $dir -type f -exec ls -l '{}' \; | awk '{s+=$5}END{print s}')
			find $dir -type f -exec $STRIP -s '{}' 2>/dev/null \;
			newsize=$(find $dir -type f -exec ls -l '{}' \; | awk '{s+=$5}END{print s}')
			size0=$((size0 + oldsize)); size1=$((size1 + newsize))
		fi
	done

	# Strip shared and static libraries
	# Remove Python *.pyc and *.pyo, Perl perllocal.pod and .packlist
	oldsize=$(find $fs -type f \( \
		-name '*.so*' -o -name '*.a' -o \
		-name '*.pyc' -o -name '*.pyo' -o \
		-name 'perllocal.pod' -o -name '.packlist' \) -exec ls -l '{}' \; | awk '{s+=$5}END{print s}')

	find $fs -name '*.so*' -exec $STRIP -s '{}' 2>/dev/null \;
	find $fs -name '*.a' -exec $STRIP --strip-debug '{}' 2>/dev/null \;
	find $fs -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete 2>/dev/null
	find $fs -type f \( -name 'perllocal.pod' -o -name '.packlist' \) -delete 2>/dev/null

	newsize=$(find $fs -type f \( \
		-name '*.so*' -o -name '*.a' -o \) -exec ls -l '{}' \; | awk '{s+=$5}END{print s}')

	comp_summary "$time0" "$((size0 + oldsize))" "$((size1 + newsize))"
}


# Strip unsupported locales (.mo files)

strip_mo_i18n() {
	[ "${COOKOPTS/!i18nz/}" != "$COOKOPTS" ] && return

	[ ! -d "$fs/usr/share/locale" ] && return
	[ -z "$(find $fs/usr/share/locale -type f -name '*.mo')" ] && return

	action 'Thin out translation files...'
	size0=$(sizes mo2)
	time0=$(get_time)

	# The variable $LOCALE is set in cook.conf and may be overridden in the receipt.
	# Default value is "" (empty). That means for us that we'll use the full
	# list of supported locales here.
	[ -z "$LOCALE" ] && LOCALE=$(get_supported_locales)

	# Existing locales
	elocales=" $(ls -1 "$fs/usr/share/locale" | tr '\n' ' ') "

	# Thin out the list of existing locales. At the end there will be only locales that need
	# deleting (and the garbage like '_AU', _US', '_BR' leaving from 'en_AU', 'en_US', 'pt_BR'...)
	for keep_locale in $LOCALE; do
		elocales=${elocales//$keep_locale}
	done

	# Remove the unsupported locales
	for rem_locale in $elocales; do
		[ -d  "$fs/usr/share/locale/$rem_locale" ] &&
		rm -r "$fs/usr/share/locale/$rem_locale"
	done

	comp_summary "$time0" "$size0" "$(sizes mo2)"
}




case $1 in
	install)
		# Compressors working in the $install
		case "$ARCH" in
			arm*) ;;
			*)
				recompress_gz
				compress_manpages
				compress_png
				compress_svg
				compress_ui
				;;
		esac

		fix_desktop_files
		normalize_mo
		strip_locale_def
		;;
	fs)
		# Compressors working in the $fs
		strip_package
		strip_mo_i18n
		;;
esac

# Clean
rm "$cache_stat"
find $comp_cache_root -type f -links -2 -delete
