# See the file "COPYING" in the main distribution directory for copyright.
#------------------------------------------------------------------------------
# $File: msdos,v 1.84 2013/02/05 13:55:22 christos Exp $
# msdos:  file(1) magic for MS-DOS files
#

# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
# updated by Joerg Jenderek at Oct 2008,Apr 2011
0	string/t	@			
>1	string/cW	\ echo\ off	DOS batch file text
!:mime	text/x-msdos-batch
>1	string/cW	echo\ off	DOS batch file text
!:mime	text/x-msdos-batch
>1	string/cW	rem		DOS batch file text
!:mime	text/x-msdos-batch
>1	string/cW	set\ 		DOS batch file text
!:mime	text/x-msdos-batch

# Tests for various EXE types.
#
# Many of the compressed formats were extraced from IDARC 1.23 source code.
#
0	string/b	MZ DOS MZ
!:mime	application/x-dosexec
# All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file.
>0x18	leshort <0x40 MS-DOS executable
# These traditional tests usually work but not always.  When test quality support is
# implemented these can be turned on.
#>>0x18	leshort	0x1c	(Borland compiler)
#>>0x18	leshort	0x1e	(MS compiler)

# If the relocation table is 0x40 or more bytes into the file, it's definitely
# not a DOS EXE.
>0x18  leshort >0x3f

# Maybe it's a PE?
>>(0x3c.l) string PE\0\0 PE
>>>(0x3c.l+24)	leshort		0x010b	\b32 executable
>>>(0x3c.l+24)	leshort		0x020b	\b32+ executable
>>>(0x3c.l+24)	leshort		0x0107	ROM image
>>>(0x3c.l+24)	default		x	Unknown PE signature
>>>>&0 		leshort		x	0x%x
>>>(0x3c.l+22)	leshort&0x2000	>0	(DLL)
>>>(0x3c.l+92)	leshort		1	(native)
>>>(0x3c.l+92)	leshort		2	(GUI)
>>>(0x3c.l+92)	leshort		3	(console)
>>>(0x3c.l+92)	leshort		7	(POSIX)
>>>(0x3c.l+92)	leshort		9	(Windows CE)
>>>(0x3c.l+92)	leshort		10	(EFI application)
>>>(0x3c.l+92)	leshort		11	(EFI boot service driver)
>>>(0x3c.l+92)	leshort		12	(EFI runtime driver)
>>>(0x3c.l+92)	leshort		13	(EFI ROM)
>>>(0x3c.l+92)	leshort		14	(XBOX)
>>>(0x3c.l+92)	leshort		15	(Windows boot application)
>>>(0x3c.l+92)	default		x	(Unknown subsystem
>>>>&0		leshort		x	0x%x)
>>>(0x3c.l+4)	leshort		0x14c	Intel 80386
>>>(0x3c.l+4)	leshort		0x166	MIPS R4000
>>>(0x3c.l+4)	leshort		0x168	MIPS R10000
>>>(0x3c.l+4)	leshort		0x184	Alpha
>>>(0x3c.l+4)	leshort		0x1a2	Hitachi SH3
>>>(0x3c.l+4)	leshort		0x1a6	Hitachi SH4
>>>(0x3c.l+4)	leshort		0x1c0	ARM
>>>(0x3c.l+4)	leshort		0x1c2	ARM Thumb
>>>(0x3c.l+4)	leshort		0x1c4	ARMv7 Thumb
>>>(0x3c.l+4)	leshort		0x1f0	PowerPC
>>>(0x3c.l+4)	leshort		0x200	Intel Itanium
>>>(0x3c.l+4)	leshort		0x266	MIPS16
>>>(0x3c.l+4)	leshort		0x268	Motorola 68000
>>>(0x3c.l+4)	leshort		0x290	PA-RISC
>>>(0x3c.l+4)	leshort		0x366	MIPSIV
>>>(0x3c.l+4)	leshort		0x466	MIPS16 with FPU
>>>(0x3c.l+4)	leshort		0xebc	EFI byte code
>>>(0x3c.l+4)	leshort		0x8664	x86-64
>>>(0x3c.l+4)	leshort		0xc0ee	MSIL
>>>(0x3c.l+4)	default		x	Unknown processor type
>>>>&0		leshort		x	0x%x
>>>(0x3c.l+22)	leshort&0x0200	>0	(stripped to external PDB)
>>>(0x3c.l+22)	leshort&0x1000	>0	system file
>>>(0x3c.l+24)	leshort		0x010b
>>>>(0x3c.l+232) lelong	>0	Mono/.Net assembly
>>>(0x3c.l+24)	leshort		0x020b
>>>>(0x3c.l+248) lelong	>0	Mono/.Net assembly

# hooray, there's a DOS extender using the PE format, with a valid PE
# executable inside (which just prints a message and exits if run in win)
>>>(8.s*16)		string		32STUB	\b, 32rtm DOS extender
>>>(8.s*16)		string		!32STUB	\b, for MS Windows
>>>(0x3c.l+0xf8)	string		UPX0 \b, UPX compressed
>>>(0x3c.l+0xf8)	search/0x140	PEC2 \b, PECompact2 compressed
>>>(0x3c.l+0xf8)	search/0x140	UPX2
>>>>(&0x10.l+(-4))	string		PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
>>>(0x3c.l+0xf8)	search/0x140	.idata
>>>>(&0xe.l+(-4))	string		PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
>>>>(&0xe.l+(-4))	string		ZZ0 \b, ZZip self-extracting archive
>>>>(&0xe.l+(-4))	string		ZZ1 \b, ZZip self-extracting archive
>>>(0x3c.l+0xf8)	search/0x140	.rsrc
>>>>(&0x0f.l+(-4))	string		a\\\4\5 \b, WinHKI self-extracting archive
>>>>(&0x0f.l+(-4))	string		Rar! \b, RAR self-extracting archive
>>>>(&0x0f.l+(-4))	search/0x3000	MSCF \b, InstallShield self-extracting archive
>>>>(&0x0f.l+(-4))	search/32	Nullsoft \b, Nullsoft Installer self-extracting archive
>>>(0x3c.l+0xf8)	search/0x140	.data
>>>>(&0x0f.l)		string		WEXTRACT \b, MS CAB-Installer self-extracting archive
>>>(0x3c.l+0xf8)	search/0x140	.petite\0 \b, Petite compressed
>>>>(0x3c.l+0xf7)	byte		x
>>>>>(&0x104.l+(-4))	string		=!sfx! \b, ACE self-extracting archive
>>>(0x3c.l+0xf8)	search/0x140	.WISE \b, WISE installer self-extracting archive
>>>(0x3c.l+0xf8)	search/0x140	.dz\0\0\0 \b, Dzip self-extracting archive
>>>&(0x3c.l+0xf8)	search/0x100	_winzip_ \b, ZIP self-extracting archive (WinZip)
>>>&(0x3c.l+0xf8)	search/0x100	SharedD \b, Microsoft Installer self-extracting archive
>>>0x30			string		Inno \b, InnoSetup self-extracting archive

# Hmm, not a PE but the relocation table is too high for a traditional DOS exe,
# must be one of the unusual subformats.
>>(0x3c.l) string !PE\0\0 MS-DOS executable

>>(0x3c.l)		string		NE \b, NE
>>>(0x3c.l+0x36)	byte		1 for OS/2 1.x
>>>(0x3c.l+0x36)	byte		2 for MS Windows 3.x
>>>(0x3c.l+0x36)	byte		3 for MS-DOS
>>>(0x3c.l+0x36)	byte		4 for Windows 386
>>>(0x3c.l+0x36)	byte		5 for Borland Operating System Services
>>>(0x3c.l+0x36)	default		x
>>>>(0x3c.l+0x36)	byte		x (unknown OS %x)
>>>(0x3c.l+0x36)	byte		0x81 for MS-DOS, Phar Lap DOS extender
>>>(0x3c.l+0x0c)	leshort&0x8003	0x8002 (DLL)
>>>(0x3c.l+0x0c)	leshort&0x8003	0x8001 (driver)
>>>&(&0x24.s-1)		string		ARJSFX \b, ARJ self-extracting archive
>>>(0x3c.l+0x70)	search/0x80	WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip)

>>(0x3c.l)		string		LX\0\0 \b, LX
>>>(0x3c.l+0x0a)	leshort		<1 (unknown OS)
>>>(0x3c.l+0x0a)	leshort		1 for OS/2
>>>(0x3c.l+0x0a)	leshort		2 for MS Windows
>>>(0x3c.l+0x0a)	leshort		3 for DOS
>>>(0x3c.l+0x0a)	leshort		>3 (unknown OS)
>>>(0x3c.l+0x10)	lelong&0x28000	=0x8000 (DLL)
>>>(0x3c.l+0x10)	lelong&0x20000	>0 (device driver)
>>>(0x3c.l+0x10)	lelong&0x300	0x300 (GUI)
>>>(0x3c.l+0x10)	lelong&0x28300	<0x300 (console)
>>>(0x3c.l+0x08)	leshort		1 i80286
>>>(0x3c.l+0x08)	leshort		2 i80386
>>>(0x3c.l+0x08)	leshort		3 i80486
>>>(8.s*16)		string		emx \b, emx
>>>>&1			string		x %s
>>>&(&0x54.l-3)		string		arjsfx \b, ARJ self-extracting archive

# MS Windows system file, supposedly a collection of LE executables
>>(0x3c.l)		string		W3 \b, W3 for MS Windows

>>(0x3c.l)		string		LE\0\0 \b, LE executable
>>>(0x3c.l+0x0a)	leshort		1
# some DOS extenders use LE files with OS/2 header
>>>>0x240		search/0x100	DOS/4G for MS-DOS, DOS4GW DOS extender
>>>>0x240		search/0x200	WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender
>>>>0x440		search/0x100	CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender
>>>>0x40		search/0x40	PMODE/W for MS-DOS, PMODE/W DOS extender
>>>>0x40		search/0x40	STUB/32A for MS-DOS, DOS/32A DOS extender (stub)
>>>>0x40		search/0x80	STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub)
>>>>0x40		search/0x80	DOS/32A for MS-DOS, DOS/32A DOS extender (embedded)
# this is a wild guess; hopefully it is a specific signature
>>>>&0x24		lelong		<0x50
>>>>>(&0x4c.l)		string		\xfc\xb8WATCOM
>>>>>>&0		search/8	3\xdbf\xb9 \b, 32Lite compressed
# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP
#>>>>(0x3c.l+0x1c)	lelong		>0x10000 for OS/2
# fails with DOS-Extenders.
>>>(0x3c.l+0x0a)	leshort		2 for MS Windows
>>>(0x3c.l+0x0a)	leshort		3 for DOS
>>>(0x3c.l+0x0a)	leshort		4 for MS Windows (VxD)
>>>(&0x7c.l+0x26)	string		UPX \b, UPX compressed
>>>&(&0x54.l-3)		string		UNACE \b, ACE self-extracting archive

# looks like ASCII, probably some embedded copyright message.
# and definitely not NE/LE/LX/PE
>>0x3c		lelong	>0x20000000
>>>(4.s*512)	leshort !0x014c \b, MZ for MS-DOS
# header data too small for extended executable
>2		long	!0
>>0x18		leshort <0x40
>>>(4.s*512)	leshort !0x014c

>>>>&(2.s-514)	string	!LE
>>>>>&-2	string	!BW \b, MZ for MS-DOS
>>>>&(2.s-514)	string	LE \b, LE
>>>>>0x240	search/0x100	DOS/4G for MS-DOS, DOS4GW DOS extender
# educated guess since indirection is still not capable enough for complex offset
# calculations (next embedded executable would be at &(&2*512+&0-2)
# I suspect there are only LE executables in these multi-exe files
>>>>&(2.s-514)	string	BW
>>>>>0x240	search/0x100	DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded)
>>>>>0x240	search/0x100	!DOS/4G ,\b BW collection for MS-DOS

# This sequence skips to the first COFF segment, usually .text
>(4.s*512)	leshort		0x014c \b, COFF
>>(8.s*16)	string		go32stub for MS-DOS, DJGPP go32 DOS extender
>>(8.s*16)	string		emx
>>>&1		string		x for DOS, Win or OS/2, emx %s
>>&(&0x42.l-3)	byte		x 
>>>&0x26	string		UPX \b, UPX compressed
# and yet another guess: small .text, and after large .data is unusal, could be 32lite
>>&0x2c		search/0xa0	.text
>>>&0x0b	lelong		<0x2000
>>>>&0		lelong		>0x6000 \b, 32lite compressed

>(8.s*16) string $WdX \b, WDos/X DOS extender

# By now an executable type should have been printed out.  The executable
# may be a self-uncompressing archive, so look for evidence of that and 
# print it out.  
#
# Some signatures below from Greg Roelofs, newt@uchicago.edu.
#
>0x35	string	\x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed
>0xe7	string	LH/2\ 	Self-Extract \b, %s
>0x1c	string	UC2X	\b, UCEXE compressed
>0x1c	string	WWP\ 	\b, WWPACK compressed
>0x1c	string	RJSX 	\b, ARJ self-extracting archive
>0x1c	string	diet 	\b, diet compressed
>0x1c	string	LZ09 	\b, LZEXE v0.90 compressed
>0x1c	string	LZ91 	\b, LZEXE v0.91 compressed
>0x1c	string	tz 	\b, TinyProg compressed
>0x1e	string	Copyright\ 1989-1990\ PKWARE\ Inc.	Self-extracting PKZIP archive
!:mime	application/zip
# Yes, this really is "Copr", not "Corp."
>0x1e	string	PKLITE\ Copr.	Self-extracting PKZIP archive
!:mime	application/zip
# winarj stores a message in the stub instead of the sig in the MZ header
>0x20	search/0xe0	aRJsfX \b, ARJ self-extracting archive
>0x20	string AIN
>>0x23	string 2	\b, AIN 2.x compressed
>>0x23	string <2	\b, AIN 1.x compressed
>>0x23	string >2	\b, AIN 1.x compressed
>0x24	string	LHa's\ SFX \b, LHa self-extracting archive
!:mime	application/x-lha
>0x24	string	LHA's\ SFX \b, LHa self-extracting archive
!:mime	application/x-lha
>0x24	string	\ $ARX \b, ARX self-extracting archive
>0x24	string	\ $LHarc \b, LHarc self-extracting archive
>0x20	string	SFX\ by\ LARC \b, LARC self-extracting archive
>0x40	string aPKG \b, aPackage self-extracting archive
>0x64	string	W\ Collis\0\0 \b, Compack compressed
>0x7a	string		Windows\ self-extracting\ ZIP	\b, ZIP self-extracting archive
>>&0xf4 search/0x140 \x0\x40\x1\x0
>>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive
>1638	string	-lh5- \b, LHa self-extracting archive v2.13S
>0x17888 string Rar! \b, RAR self-extracting archive

# Skip to the end of the EXE.  This will usually work fine in the PE case
# because the MZ image is hardcoded into the toolchain and almost certainly
# won't match any of these signatures.
>(4.s*512)	long	x 
>>&(2.s-517)	byte	x 
>>>&0	string		PK\3\4 \b, ZIP self-extracting archive
>>>&0	string		Rar! \b, RAR self-extracting archive
>>>&0	string		=!\x11 \b, AIN 2.x self-extracting archive
>>>&0	string		=!\x12 \b, AIN 2.x self-extracting archive
>>>&0	string		=!\x17 \b, AIN 1.x self-extracting archive
>>>&0	string		=!\x18 \b, AIN 1.x self-extracting archive
>>>&7	search/400	**ACE** \b, ACE self-extracting archive
>>>&0	search/0x480	UC2SFX\ Header \b, UC2 self-extracting archive

# a few unknown ZIP sfxes, no idea if they are needed or if they are
# already captured by the generic patterns above
>(8.s*16)	search/0x20	PKSFX \b, ZIP self-extracting archive (PKZIP)
# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive
#

# TELVOX Teleinformatica CODEC self-extractor for OS/2:
>49801	string	\x79\xff\x80\xff\x76\xff	\b, CODEC archive v3.21
>>49824 leshort		=1			\b, 1 file
>>49824 leshort		>1			\b, %u files

# Popular applications
2080	string	Microsoft\ Word\ 6.0\ Document	%s
!:mime	application/msword
2080	string	Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data
!:mime	application/msword
# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Word)
2112	string	MSWordDoc			Microsoft Word document data
!:mime	application/msword
#
0	belong	0x31be0000			Microsoft Word Document
!:mime	application/msword
#
0	string/b	PO^Q`				Microsoft Word 6.0 Document
!:mime	application/msword
#
0	string/b	\376\067\0\043			Microsoft Office Document
!:mime	application/msword
0	string/b	\333\245-\0\0\0			Microsoft Office Document
!:mime	application/msword
512	string/b	\354\245\301			Microsoft Word Document
!:mime	application/msword

#
0	string/b	\xDB\xA5\x2D\x00		Microsoft WinWord 2.0 Document
!:mime application/msword
#
2080	string	Microsoft\ Excel\ 5.0\ Worksheet	%s
!:mime	application/vnd.ms-excel
#
0	string/b	\xDB\xA5\x2D\x00		Microsoft WinWord 2.0 Document
!:mime application/msword

2080	string	Foglio\ di\ lavoro\ Microsoft\ Exce	%s
!:mime	application/vnd.ms-excel
#
# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Excel)
2114	string	Biff5		Microsoft Excel 5.0 Worksheet
!:mime	application/vnd.ms-excel
# Italian MS-Excel
2121	string	Biff5		Microsoft Excel 5.0 Worksheet
!:mime	application/vnd.ms-excel
0	string/b	\x09\x04\x06\x00\x00\x00\x10\x00	Microsoft Excel Worksheet
!:mime	application/vnd.ms-excel
#
0	belong	0x00001a00	Lotus 1-2-3
!:mime	application/x-123
#
0	belong	0x00000200	Lotus 1-2-3
!:mime	application/x-123
0	string/b		WordPro\0	Lotus WordPro
!:mime	application/vnd.lotus-wordpro
0	string/b		WordPro\r\373	Lotus WordPro
!:mime	application/vnd.lotus-wordpro

# Windows icons (Ian Springer <ips@fpk.hp.com>)
0	string/b	\000\000\001\000	MS Windows icon resource
!:mime	image/x-icon

# .PIF files added by Joerg Jenderek from http://smsoft.ru/en/pifdoc.htm
# only for windows versions equal or greater 3.0
0x171	string	MICROSOFT\ PIFEX\0	Windows Program Information File
!:mime	application/x-dosexec

# TNEF magic From "Joomy" <joomy@se-ed.net> 
# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF)
0	leshort		0x223e9f78	TNEF
!:mime	application/vnd.ms-tnef

#------------------------------------------------------------------------------
# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
# Microsoft Cabinet files
0	string/b	MSCF\0\0\0\0	Microsoft Cabinet archive data
!:mime application/vnd.ms-cab-compressed

# from http://filext.com by Derek M Jones <derek@knosof.co.uk>
# False positive with PPT (also currently this string is too long)
#0	string/b	\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06	Microsoft Installer
0	string/b	\320\317\021\340\241\261\032\341	Microsoft Office Document
!:mime	application/msword
#>48	byte	0x1B					Excel Document
#!:mime application/vnd.ms-excel
#>546	string	bjbj			Microsoft Word Document
#!:mime	application/msword
#>546	string	jbjb			Microsoft Word Document
#!:mime	application/msword

0	string/b	\224\246\056		Microsoft Word Document
!:mime	application/msword

512	string	R\0o\0o\0t\0\ \0E\0n\0t\0r\0y	Microsoft Word Document
!:mime	application/msword

# MS eBook format (.lit)
0	string/b	ITOLITLS		Microsoft Reader eBook Data
>8	lelong	x			\b, version %u
!:mime					application/x-ms-reader
