RCS_ID("$Id: FFArchivePDF.m 517 2005-10-15 16:49:49Z ravemax $")

#import "FFArchivePDF.h"
#import <QuickTime/ImageCompression.h>
#import <QuickTime/Movies.h>
#import <QuickTime/QuickTimeComponents.h>

#import <string.h>

#import <QTKit/QTDataReference.h>


@implementation FFPDFImage

- (id)initWithStream:(CGPDFStreamRef)stream
	   andDictionary:(CGPDFDictionaryRef)dict {
	
	self = [super init];
	if (self != nil) {
		m_stream	= stream;
		m_dict		= dict;
	}
	return self;
}

- (CGPDFStreamRef)stream	{ return m_stream; }
- (CGPDFDictionaryRef)dict	{ return m_dict; }

@end

#pragma mark -


@implementation FFArchivePDF

#pragma mark PDF Colorspaces

// Bit not set = 0, else 255
// Known issue: "Decode" dictionary (swaps 0 <-> 1)
static void createMonochromeImage(CFDataRef* data, long width, long height) {	
	CFMutableDataRef	newData;
	const UInt8*		dataPtr, *dataEndPtr;
	UInt8*				newDataPtr, *lineBreak;
	int					bit;
	
	newData		= CFDataCreateMutable(NULL, width * height);

	dataPtr		= CFDataGetBytePtr(*data);
	dataEndPtr	= dataPtr + CFDataGetLength(*data);
	newDataPtr	= CFDataGetMutableBytePtr(newData);
	lineBreak	= newDataPtr + width;
	
	while (dataPtr < dataEndPtr) {
		for (bit = 7; bit >= 0; bit--) { // highest bit first
			*newDataPtr = ((*dataPtr >> bit) & 1) ? 255 : 0;
			newDataPtr++;
			
			// Yes there unused bits .. :-/ heavly slowdown here
			if (newDataPtr == lineBreak) {
				lineBreak += width;
				break;
			}
		}
		dataPtr++;
	}
	
	CFRelease(*data);
	
	*data = newData;
}

static CGColorSpaceRef colorSpaceFromName(const char* name, CFDataRef* data,
										  long* bpc, long width, long height) {
	FFLOG(8, @"-- colorspace (name) = %s, %ldx%ld", name, width, height);

	if (!strcmp(name, "DeviceRGB"))
		return CGColorSpaceCreateDeviceRGB();
	if (!strcmp(name, "DeviceCMYK"))
		return CGColorSpaceCreateDeviceCMYK();
	if (!strcmp(name, "DeviceGray")) {
		if (*bpc == 1) {
			createMonochromeImage(data, width, height);
			*bpc = 8;
		}
		return CGColorSpaceCreateDeviceGray();
	}
	
	return NULL;
}

static CGColorSpaceRef parseColorSpaceArray(CGPDFArrayRef csarray, CFDataRef* data, 
											long* bpc, long width, long height); // forward

static CGColorSpaceRef parseColorSpaceIndexed(CGPDFArrayRef csarray, CFDataRef* data,
											  long* bpc, long width, long height) {
	const char*		baseName;
	CGColorSpaceRef baseCS;
	
	// Get the base colorspace
	if (CGPDFArrayGetName(csarray, 1, &baseName))
		baseCS = colorSpaceFromName(baseName, data, bpc, width, height);
	else {
		CGPDFArrayRef bcsa;
		if (CGPDFArrayGetArray(csarray, 1, &bcsa))
			baseCS = parseColorSpaceArray(bcsa, data, bpc, width, height);
	}
	
	// Read the color table and create the color space
	if (baseCS != NULL) {
		CGPDFInteger	lastIndex;
		CGPDFStreamRef	colTabStream;
		CGPDFDataFormat unused;
		CFDataRef		colTabData = NULL;
		CGPDFStringRef	colTabString;
		
		CGPDFArrayGetInteger(csarray, 2, &lastIndex);

		if (CGPDFArrayGetStream(csarray, 3, &colTabStream))
			colTabData	= CGPDFStreamCopyData(colTabStream, &unused);
		else if (!CGPDFArrayGetString(csarray, 3, &colTabString)) {
			CGColorSpaceRelease(baseCS);
			return NULL;
		}
		
#if 0 // Does not work - only grayscale	-- unmaintained code
		CGColorSpaceRef newCS;
		
		newCS	= CGColorSpaceCreateIndexed(baseCS, lastIndex, CFDataGetBytePtr(colTab));
		
		CFRelease(colTabData);
		CGColorSpaceRelease(baseCS);
		
		return newCS;
#else
		int					numPixels, numComp;
		CFMutableDataRef	newData;
		const UInt8*		colTabPtr, *dataPtr, *dataEndPtr;
		UInt8*				newDataPtr;
		
		numPixels	= (int)CFDataGetLength(*data);
		numComp		= CGColorSpaceGetNumberOfComponents(baseCS);
		newData		= CFDataCreateMutable(NULL, numPixels * numComp);
		
		colTabPtr	= (colTabData != NULL) ? CFDataGetBytePtr(colTabData) : (const UInt8*)CGPDFStringGetBytePtr(colTabString);
		dataPtr		= CFDataGetBytePtr(*data);
		dataEndPtr	= dataPtr + numPixels;
		newDataPtr	= CFDataGetMutableBytePtr(newData);
		
		while (dataPtr < dataEndPtr) {
			memcpy(newDataPtr, &(colTabPtr[ *dataPtr * numComp ]), numComp);
			
			dataPtr++;
			newDataPtr += numComp;
		}
		
		if (colTabData != NULL)
			CFRelease(colTabData);
		CFRelease(*data);
		
		*data = newData;

		return baseCS;
#endif
	}
	
	return NULL;
}

static CGColorSpaceRef parseColorSpaceICC(CGPDFArrayRef csarray, CFDataRef* data,
										  long* bpc, long width, long height) {
	CGPDFStreamRef		iccStream;
	CGPDFDictionaryRef	iccDict;
	const char*			altName;
	CGColorSpaceRef		newCS = NULL;
	
	CGPDFArrayGetStream(csarray, 1, &iccStream);
	iccDict = CGPDFStreamGetDictionary(iccStream);

	// Trying to read and use the alternate name
	if (CGPDFDictionaryGetName(iccDict, "Alternate", &altName))
		newCS = colorSpaceFromName(altName, data, bpc, width, height);
	
	// Use the number of components to determine the correct CS
	if (newCS == NULL) {
		CGPDFInteger n;

		CGPDFDictionaryGetInteger(iccDict, "N", &n);
		switch (n) {
			case 1 : if (*bpc == 1) {
						createMonochromeImage(data, width, height);
						*bpc = 8;
					 }
					 newCS = CGColorSpaceCreateDeviceGray(); break;
			case 3 : newCS = CGColorSpaceCreateDeviceRGB(); break;
			case 4 : newCS = CGColorSpaceCreateDeviceCMYK(); break;
		}
	}
	
	// CGColorSpaceRef CGColorSpaceCreateICCBased ( // range?
	return newCS;
}

static void pdfArrayToFloat(CGPDFArrayRef pa, float dst[], int n) {
	int				idx;
	CGPDFReal		r;
	CGPDFInteger	i;
	
	for (idx = 0; idx < n; idx++) {
		if (CGPDFArrayGetNumber(pa, idx, &r))
			dst[idx] = r;
		else if (CGPDFArrayGetInteger(pa, idx, &i))
			dst[idx] = (float)i;
		else
			dst[idx] = 0.0f;
	}
}

static CGColorSpaceRef parseColorSpaceLab(CGPDFArrayRef csarray) {
	CGPDFDictionaryRef	labDict;
	CGPDFArrayRef		whitePDFArray, blackPDFArray, rangePDFArray;
	float				whiteFloat[3], blackFloat[3], rangeFloat[4];
	
	CGPDFArrayGetDictionary(csarray, 1, &labDict);
	
	// White Point
	CGPDFDictionaryGetArray(labDict, "WhitePoint", &whitePDFArray);
	pdfArrayToFloat(whitePDFArray, whiteFloat, 3);
	
	// Black point - optional
	if (CGPDFDictionaryGetArray(labDict, "BlackPoint", &blackPDFArray))
		pdfArrayToFloat(blackPDFArray, blackFloat, 3);
	else
		bzero(blackFloat, sizeof(float) * 3);
	
	// Range - optional
	if (CGPDFDictionaryGetArray(labDict, "Range", &rangePDFArray))
		pdfArrayToFloat(rangePDFArray, rangeFloat, 4);
	else
		bzero(rangeFloat, sizeof(float) * 4);
	
	return CGColorSpaceCreateLab(whiteFloat, blackFloat, rangeFloat);
}

static CGColorSpaceRef parseColorSpaceArray(CGPDFArrayRef csarray, CFDataRef* data,
											long* bpc, long width, long height) {
	const char* name;
	
	if (CGPDFArrayGetName(csarray, 0, &name)) {
		FFLOG(8, @"colorspace (array): %s - %ldx%ld", name, width, height);
		if (!strcmp(name, "Indexed"))
			return parseColorSpaceIndexed(csarray, data, bpc, width, height);
		else if (!strcmp(name, "ICCBased"))
			return parseColorSpaceICC(csarray, data, bpc, width, height);
		else if (!strcmp(name, "Lab"))
			return parseColorSpaceLab(csarray);
	}
	
	return NULL;
}

#pragma mark -
#pragma mark CGImage to TIFF

// http://developer.apple.com/technotes/tn2005/tn2140.html
BOOL exportCGImageToTIFFile(CGImageRef img, CFStringRef dstpath) {
    Handle					dataRef = NULL;
    OSType					dataRefType;
    GraphicsExportComponent ge = 0;
    unsigned long			sizeWritten;
    ComponentResult			res;
	
	// Create the data reference
	res = QTNewDataReferenceFromFullPathCFString(dstpath, kCFURLPOSIXPathStyle,
												 0, &dataRef, &dataRefType);
	if ((dataRef == NULL) || (res != noErr))
		return FALSE;
	
	// Setup the exporter & export
	res = OpenADefaultComponent(GraphicsExporterComponentType, kQTFileTypeTIFF, &ge);
	if (res == noErr) {
		GraphicsExportSetInputCGImage(ge, img);
		GraphicsExportSetOutputDataReference(ge, dataRef, dataRefType);
		GraphicsExportSetCompressionMethod(ge, kQTTIFFCompression_None);
		GraphicsExportSetExifEnabled(ge, FALSE);
        
		res = GraphicsExportDoExport(ge, &sizeWritten);
			
		CloseComponent(ge);
	}
	DisposeHandle(dataRef);
	
	return (res == noErr);
}

#pragma mark -
#pragma mark PDF handling

typedef struct {
	CGPDFDictionaryRef		xobjsDict;
	int						pageNo;
	NSMutableArray*			files;
	NSMutableDictionary*	images;
} HandleXObjectsArgs;

static void handleXObjects(const char* key,	CGPDFObjectRef value, void* info) {
	HandleXObjectsArgs*	hxargs;
	CGPDFStreamRef		kstream; // how to determine the "value" type?
	CGPDFDictionaryRef	sdict;
	const char*			subtypeName, *filter;
	CGPDFBoolean		isMask;
	CFStringRef			imgName;
	CGPDFStringRef		name;
	FFPDFImage*			img;
	NSString*			ext, *filename;
	CGPDFInteger		len;
	NSNumber*			fsize;
	
	// Better to read
	hxargs = (HandleXObjectsArgs*)info;
	
	// Invalid XObject - no stream?
	if (!CGPDFDictionaryGetStream(hxargs->xobjsDict,  key, &kstream))
		return;
	
	// An image?
	sdict = CGPDFStreamGetDictionary(kstream);
	if (!CGPDFDictionaryGetName(sdict, "Subtype", &subtypeName) ||
		strcmp(subtypeName, "Image"))
		return;
	
	// Not an image mask?
	if (CGPDFDictionaryGetBoolean(sdict, "ImageMask", &isMask) && isMask)
		return;

	// Get the name - if available
	if (CGPDFDictionaryGetString(sdict, "ImageName", &name))
		imgName = CGPDFStringCopyTextString(name);
	else if (CGPDFDictionaryGetString(sdict, "Name", &name))
		imgName = CGPDFStringCopyTextString(name);
	else
		imgName = NULL;

	// Get the extension
	CGPDFDictionaryGetName(sdict, "Filter", &filter);
	if (!strcmp(filter, "DCTDecode"))
		ext = @"jpg";
	else if (!strcmp(filter, "JPXDecode")) // JPEG2000
		ext = @"jp2";
	else
		ext = @"tif";
	
	// Create the filename
	if (imgName != NULL) {
		if ([(NSString*)imgName hasSuffix:ext])
			filename = [[NSString alloc] initWithFormat:@"%d-%@", 
						hxargs->pageNo, imgName];
		else
			filename = [[NSString alloc] initWithFormat:@"%d-%@.%@",
						hxargs->pageNo, imgName, ext];

		CFRelease(imgName);
	} else
		filename = [[NSString alloc] initWithFormat:@"%d-%s.%@",
					hxargs->pageNo, key, ext];

	// Get the length
	if (CGPDFDictionaryGetInteger(sdict, "Length", &len))
		fsize	= [NSNumber numberWithLong:len];
	else
		fsize	= [NSNumber numberWithInt:0];
	
	// Create the new image entry
	img	= [[FFPDFImage alloc] initWithStream:kstream andDictionary:sdict];
	[hxargs->files addObject:[NSDictionary dictionaryWithObjectsAndKeys:
		filename, FFFilename,
		fsize, FFCompressedFileSize,
		fsize, FFUncompressedFileSize,
		[NSDate date], FFFileCreationDate, // CGPDFDocumentGetInfo available 10.4+
		nil]];
	[hxargs->images setObject:img forKey:filename];
	[filename release]; // Both are retained by the dictionary
	[img release];
}

- (BOOL)_openAndParsePDF {
	CGPDFPageRef		page;
	size_t				numPages, pageIdx;
	CGPDFDictionaryRef	pageDict, rsrcDict;
	HandleXObjectsArgs	hxargs;
	
	// Open the PDF
	m_document = CGPDFDocumentCreateWithURL((CFURLRef)[NSURL fileURLWithPath:m_filePath]); 
	if (m_document == NULL) {
		FFLOG(8, @"Failed to open the PDF");
		return FALSE;
	}
	
	// Go through all pages
	hxargs.files	= m_files;
	hxargs.images	= m_images;

	numPages = CGPDFDocumentGetNumberOfPages(m_document);
	for (pageIdx = 0; pageIdx < numPages; pageIdx++) {
		// Get the new page
		hxargs.pageNo	= pageIdx + 1;
		page			= CGPDFDocumentGetPage(m_document, hxargs.pageNo); 
		pageDict		= CGPDFPageGetDictionary(page);
		if (pageDict == NULL) 
			continue;

		// Resources?
		if (!CGPDFDictionaryGetDictionary(pageDict, "Resources", &rsrcDict))
			continue;
		
		// XObjects (forms, images..)?
		if (CGPDFDictionaryGetDictionary(rsrcDict, "XObject", &(hxargs.xobjsDict)) &&
			(CGPDFDictionaryGetCount(hxargs.xobjsDict) > 0))
			CGPDFDictionaryApplyFunction(hxargs.xobjsDict, handleXObjects, (void*)&hxargs);
	}
	
	return TRUE;
}

// w/o extension
- (BOOL)_writeImage:(FFPDFImage*)img toPath:(NSString*)path {
	CGPDFDataFormat	dformat;
	CFDataRef		data;
	BOOL			ret;
	
	data = CGPDFStreamCopyData([img stream], &dformat);
	
	// Raw
	if (dformat == CGPDFDataFormatRaw) {
		const char* const CSID = "ColorSpace";
		
		CGPDFDictionaryRef	sdict;
		CGPDFInteger		wd, ht, bpc;
		const char*			nameCS;
		CGPDFArrayRef		csarray;
		CGColorSpaceRef		cs = NULL;

		sdict = [img dict];
			
		// Image attributes
		CGPDFDictionaryGetInteger(sdict, "BitsPerComponent", &bpc);
		CGPDFDictionaryGetInteger(sdict, "Width", &wd);
		CGPDFDictionaryGetInteger(sdict, "Height", &ht);
	
		// Colorspace
		if (CGPDFDictionaryGetName(sdict, CSID, &nameCS)) // Simple
			cs = colorSpaceFromName(nameCS, &data, &bpc, wd, ht);
		else if (CGPDFDictionaryGetArray(sdict, CSID, &csarray))
			cs = parseColorSpaceArray(csarray, &data, &bpc, wd, ht);
		
		// Create the image and write it
		if (cs != NULL) {
			int					comp;
			CGDataProviderRef	dprov;
			CGImageRef			newImg;
			
			comp	= CGColorSpaceGetNumberOfComponents(cs);
			dprov	= CGDataProviderCreateWithData(NULL, CFDataGetBytePtr(data),
												   CFDataGetLength(data), NULL);
			
			newImg	= CGImageCreate(wd, ht, bpc, bpc * comp, wd * comp * (bpc / 8),
									cs, kCGImageAlphaNone, dprov,
									NULL, FALSE, kCGRenderingIntentDefault);
			if (newImg != NULL) {
				ret = exportCGImageToTIFFile(newImg, (CFStringRef)path);
				CGImageRelease(newImg);
			} else
				ret = FALSE;
			
			CGDataProviderRelease(dprov);
			CGColorSpaceRelease(cs);
		} else
			ret = FALSE;
		
	// JPEG or JPEG2000
	} else
		ret = [(NSData*)data writeToFile:path atomically:FALSE];
	
	CFRelease(data);
	
	return ret;
}

#pragma mark -
#pragma mark Class interfaces

- (id)initWithFile:(NSString*)filePath fallbackEncoding:(NSStringEncoding)enc {
	self = [super initWithFile:filePath fallbackEncoding:enc];
	if (self != nil) {
		m_files		= [[NSMutableArray alloc] init];
		m_images	= [[NSMutableDictionary alloc] init];
		if (![self _openAndParsePDF]) {
			[self release];
			return nil;
		}
	}
	return self;
}

- (void)dealloc {
	[m_images release];
	[m_files release];
	CGPDFDocumentRelease(m_document);
	[super dealloc];
}

- (NSArray*)filesInArchive {
	return m_files;
}

- (void)extractFile:(NSString*)filename toFilePath:(NSString*)toPath {
	FFPDFImage* img = [m_images objectForKey:filename];

	if ((img == NULL) || ![self _writeImage:img toPath:toPath])
		[NSException raise:[self className]
					format:FFTR(@"Unsupported image type (e.g. colorspace) or an internal error")];
}

@end
