HomeForumSourceResearchGuide
Sign in to contribute to source. how it works
Component compress.Compressor:gzip by barry
expand copy to clipboardexpand
data GZHeader {
	byte ID1
	byte ID2
	byte CM
const byte FLG_FTEXT = 0x1
const byte FLG_FHCRC = 0x2
const byte FLG_FEXTRA = 0x4
const byte FLG_FNAME = 0x8
const byte FLG_FCOMMENT = 0x10
	byte FLG
	int4 mtime
	byte XFL
	byte OS
	}

data Int4 {
	int4 val
	}

data Int2 {
	int2 val
	}

const int BUF_SIZE = 5048

component provides Compressor:gzip requires compress.algorithm.StreamCompression:deflate, data.checksum.CRC32 crc32 {
	
	int4 readInt4(byte buf[])
		{
		Int4 i4 = new Int4()
		byte copyBuf[] = dana.serial(i4)
		
		copyBuf =[] buf
		
		//deal with endianness
		byte swap
		
		swap = copyBuf[3]
		copyBuf[3] = copyBuf[0]
		copyBuf[0] = swap
		
		swap = copyBuf[2]
		copyBuf[2] = copyBuf[1]
		copyBuf[1] = swap
		
		return i4.val
		}
	
	int2 readInt2(File fd)
		{
		Int2 i2 = new Int2()
		byte copyBuf[] = dana.serial(i2)
		
		byte buf[] = fd.read(copyBuf.arrayLength)
		
		copyBuf =[] buf
		
		//deal with endianness
		byte swap
		
		swap = copyBuf[1]
		copyBuf[1] = copyBuf[0]
		copyBuf[0] = swap
		
		return i2.val
		}
	
	void writeInt4(File fd, int4 val)
		{
		Int4 i4 = new Int4()
		i4.val = val
		
		byte copyBuf[] = dana.serial(i4)
		
		//deal with endianness
		byte swap
		
		swap = copyBuf[3]
		copyBuf[3] = copyBuf[0]
		copyBuf[0] = swap
		
		swap = copyBuf[2]
		copyBuf[2] = copyBuf[1]
		copyBuf[1] = swap
		
		fd.write(copyBuf)
		}
	
	byte[] readOptionalHeader(File fd)
		{
		//2 byte length header, then the data
		int2 len = readInt2(fd)
		
		return fd.read(len)
		}
	
	char[] readToZero(File fd)
		{
		byte buf[] = null
		byte b[] = 1
		while (!fd.eof() && b != 0)
			{
			b = fd.read(1)
			if (b != 0) buf = new byte[](buf, b)
			}
		
		return buf
		}
	
	bool Compressor:compress(File ifd, File ofd, opt char filename[])
		{
		StreamCompression alg = new StreamCompression:deflate()
		
		GZHeader mainHeader = new GZHeader()
		
		mainHeader.ID1 = 31
		mainHeader.ID2 = 139
		
		mainHeader.CM = 8
		
		if (isset filename)
			mainHeader.FLG |= GZHeader.FLG_FNAME
		
		mainHeader.OS = 255
		
		byte copyBuf[] = dana.serial(mainHeader)
		
		//header
		ofd.write(copyBuf)
		
		//filename
		if (isset filename)
			{
			ofd.write(filename)
			//trailing \0
			ofd.write(0)
			}
		
		//compression content
		byte buf[]
		byte combuf[]
		
		int4 crc
		
		alg.compressInit()
		
		while (!ifd.eof())
			{
			buf = ifd.read(BUF_SIZE)
			crc = crc32.makeCRC(crc, buf)
			combuf = alg.compress(buf, ifd.eof())
			ofd.write(combuf)
			}
		
		alg.compressEnd()
		
		//CRC and isize
		//fd.write(outInt4(crc))
		writeInt4(ofd, crc)
		writeInt4(ofd, ifd.getSize())
		
		return true
		}
	
	bool Compressor:decompress(File ifd, File ofd)
		{
		//a gzip file can contain a list of compressed files (or "members"), one after the other
		
		StreamCompression alg = new StreamCompression:deflate()
		
		GZHeader mainHeader = new GZHeader()
		char fileName[]
		char memberComment[]
		byte copyBuf[] = dana.serial(mainHeader)
		
		//read the standard header
		byte buf[] = ifd.read(copyBuf.arrayLength)
		
		if (buf.arrayLength != copyBuf.arrayLength)
			{
			throw new Exception("insufficient header bytes (file corrupted?)")
			}
		
		copyBuf =[] buf
		
		//check for magic gzip header bytes
		if (mainHeader.ID1 == 31 && mainHeader.ID2 == 139)
			{
			//read optional section, if present
			if ((mainHeader.FLG & GZHeader.FLG_FEXTRA) == GZHeader.FLG_FEXTRA)
				fileName = readOptionalHeader(ifd)
			
			//read filename, if present
			if ((mainHeader.FLG & GZHeader.FLG_FNAME) == GZHeader.FLG_FNAME)
				fileName = readToZero(ifd)
			
			//read comment, if present
			if ((mainHeader.FLG & GZHeader.FLG_FCOMMENT) == GZHeader.FLG_FCOMMENT)
				memberComment = readToZero(ifd)
			
			//now comes the compressed content
			int4 crcCheck
			byte chunk[]
			
			alg.decompressInit()
			
			while (!ifd.eof() && alg.decompressStatus() == StreamCompression.DS_CONTINUE)
				{
				buf = ifd.read(BUF_SIZE)
				chunk = alg.decompress(buf)
				crcCheck = crc32.makeCRC(crcCheck, chunk)
				ofd.write(chunk)
				}
			
			int lcSize = alg.decompressEnd()
			
			//adjust the file read head to wherever the last compressed chunk actually ended
			ifd.setPos(ifd.getPos() - (buf.arrayLength - lcSize))
			
			//read CRC and ISIZE (original size of uncompressed data, mod 2^32)
			int4 crc = readInt4(ifd.read(4))
			
			int4 isize = readInt4(ifd.read(4))
			
			if (crc != crcCheck)
				{
				throw new Exception("CRC test failed")
				}
			}
			else
			{
			throw new Exception("header does not match gzip format")
			}
		
		return true
		}
	
	char[] Compressor:getFileName(File ifd)
		{
		int opos = ifd.getPos()
		
		GZHeader mainHeader = new GZHeader()
		char fileName[]
		char memberComment[]
		byte copyBuf[] = dana.serial(mainHeader)
		
		//read the standard header
		byte buf[] = ifd.read(copyBuf.arrayLength)
		
		if (buf.arrayLength != copyBuf.arrayLength)
			{
			throw new Exception("insufficient header bytes (file corrupted?)")
			}
		
		copyBuf =[] buf
		
		//check for magic gzip header bytes
		if (mainHeader.ID1 == 31 && mainHeader.ID2 == 139)
			{
			//read optional section, if present
			if ((mainHeader.FLG & GZHeader.FLG_FEXTRA) == GZHeader.FLG_FEXTRA)
				fileName = readOptionalHeader(ifd)
			
			//read filename, if present
			if ((mainHeader.FLG & GZHeader.FLG_FNAME) == GZHeader.FLG_FNAME)
				fileName = readToZero(ifd)
			
			ifd.setPos(opos)
			
			return fileName
			}
			else
			{
			ifd.setPos(opos)
			
			throw new Exception("header does not match gzip format")
			}
		
		ifd.setPos(opos)
		
		return null
		}
	
	}
Revision history
To propose a new revision to this entity, use dana source put -uc your/new/version.dn -n compress.Compressor:gzip -m "reason for update" -u yourUsername
Version 1 (this version) by barry
Notes for this version: Standard Library Initialisation