Data Compression Methods

In this article, I will present some basically developed data-compression algorithms. Amid them are:

  • RLE (Runtime-Length Encoding) methods
  • Grayscale bitmaps compression class
  1. Including rle.cpp, there are two methods: CompressRLE and DecompressRLE. As parameters, these methods need input/output file names. The bit-length can't get over 127 bytes. The input file can be presented in any binary format.
  2. Quad.cpp—this file contains the grayscale matrix class; the grayscale method is commonly used in a fine matrix compression, and can be used for any purpose.
  3. quad.in—is a sample of input file and has its own format: %C%C..%C - n times, where n is the first byte in sequence.
/*************************************************/
/* 2003 (C) Hannibal Lecturer */
/* Grayscale compression methods demo */
/*************************************************/
/* Format of input file (cycle allowed): */
/* typedef struct { */
/* int intSize; */
/* char* szData; */
/* } tagInputGrayScaleMatrix; */
/*************************************************/



#include <stdio.h>
#include <string.h>
#include <math.h>


#define MAX_DIMENSION 16



class GrayScaleMatrix
{
public:
int Dimension;
int intTotalBlocksCompressed;
unsigned char Matrix [MAX_DIMENSION][MAX_DIMENSION];
unsigned char szCompressedData [MAX_DIMENSION*MAX_DIMENSION];

GrayScaleMatrix (int Dimension);
~GrayScaleMatrix ();
void DisplayMatrix (FILE* display);
int GetMatrixBit (int y, int x);
int GetQuadType (int y, int x, int intSize);
int CompressMatrix (FILE* fout, int y, int x, int intSize, int
intType);
int SetMatrixBit (int y, int x, int intValue);
int DecompressMatrix (FILE* fin, int intType);

private:
char szPathString [MAX_DIMENSION*8];
int intPathLen;
}

GrayScaleMatrix::GrayScaleMatrix (int Dimension)
{
int i,j;
this->Dimension = Dimension;
for (i=0;i<sizeof(szPathString);i++) szPathString[i] = 0;
intPathLen = 0;
intTotalBlocksCompressed = 0;

for (i=0;i<Dimension;i++)
for (j=0;j<(Dimension/8);j++) Matrix[i][j] = 0;
}

GrayScaleMatrix::~GrayScaleMatrix ()
{
int i,j;
for (i=0;i<MAX_DIMENSION;i++)
for (j=0;j<MAX_DIMENSION;j++) Matrix[i][j] = 0;
}


void GrayScaleMatrix::DisplayMatrix (FILE* display)
{
if (display==0) return;

int i,j;
for (i=1;i<=Dimension;i++)
{
for (j=1;j<=Dimension;j++) fprintf
(display,"%d",GetMatrixBit(i,j));
fprintf (display,"\r\n");
};
}


int GrayScaleMatrix::GetQuadType (int y, int x, int intSize)
{
int i,j;
int blRes = 0;

for (i=0;i<intSize;i++)
for (j=0;j<intSize;j++) if (GetMatrixBit (y+i,x+j)!=0) blRes =
1;

if (blRes)
{
for (i=0;i<intSize;i++)
for (j=0;j<intSize;j++) if (GetMatrixBit(y+i,x+j)!=1) return
(2);
return (1);
}
else return (0);

return (2);
}


int GrayScaleMatrix::SetMatrixBit (int y, int x, int intValue)
{
if (GetMatrixBit(y,x)==intValue) return (0);
switch (intValue)
{
case 0: { Matrix[y-1][(x-1)/8] ^= (unsigned char)pow(2,(x-1)%8);
return(1); }
case 1: { Matrix[y-1][(x-1)/8] |= (unsigned char)pow(2,(x-1)%8);
return(1); }
default: return(0);
}
return(0);
}

int GrayScaleMatrix::GetMatrixBit (int y, int x)
{
return ( (unsigned char) (Matrix[y-1][(x-1)/8] << (8-x))
>> 7 );
}

int GrayScaleMatrix::DecompressMatrix (FILE* fin, int intType)
{
int i,j;
int intChar;

if (fin!=0)
{
intTotalBlocksCompressed = 0;
while ((intChar = getc(fin))!=EOF)
for (i=0;i<intChar;i++)
szCompressedData [intTotalBlocksCompressed++] = getc(fin);
}

for (i=1;i<=Dimension;i++)
for (j=1;j<=Dimension;j++)
if (intTotalBlocksCompressed==0) SetMatrixBit(i,j,intType);
else SetMatrixBit (i,j,~intType);


for (i=0;i<intTotalBlocksCompressed;i++)
{
intChar = szCompressedData[i];
int intPos [8] = {0,0,0,0,0,0,0,0}, intTotal = 0;
int y = 1, x = 1, intSize = Dimension, k;

while (intChar>3)
{
intPos[intTotal] = intChar%4;
intChar /= 4;
intTotal++;
}
intPos [intTotal++] = intChar;

for (j=intTotal-1;j>=0;j--)
{
intSize /= 2;
y += (intPos[j]/2)*intSize;
x += (intPos[j]%2)*intSize;
}

for (k=y;k<y+intSize;k++)
for (j=x;j<x+intSize;j++)
{
SetMatrixBit (k,j,intType);
}
}
return (1);
}


int GrayScaleMatrix::CompressMatrix (FILE* fout, int y, int x,
int intSize, int intType)
{
int i,j;
int intSizeNew = intSize/2;

if (GetQuadType(y,x,intSize)==intType) return (0);

for (i=0;i<2;i++)
for (j=0;j<2;j++)
{
szPathString [intPathLen] = 2*i + j;
intPathLen++;
if (GetQuadType(y+i*intSizeNew,x+j*intSizeNew,intSizeNew)==2)
CompressMatrix
(fout,y+i*intSizeNew,x+j*intSizeNew,intSizeNew,intType);
else if
(GetQuadType(y+i*intSizeNew,x+j*intSizeNew,intSizeNew)==intType)
{
int k, intRes = 0;
for (k=intPathLen-1;k>=0;k--) intRes +=
szPathString[k]*pow(4,intPathLen-1-k);
szCompressedData [intTotalBlocksCompressed] = (unsigned
char)intRes;
intTotalBlocksCompressed++;
if (fout!=0) fprintf(fout,"%c",(unsigned char)intRes);
}
intPathLen--;
szPathString [intPathLen] = 0;
}

return (0);
}

int CompressQuadFile (char* strIn, char* strOut)
{
int chIn;
FILE* fin = fopen (strIn,"rb");
FILE* fout = fopen (strOut,"wb");

while ((chIn=getc(fin))!=EOF)
{
char chAdd;
int i, j;
GrayScaleMatrix* mtx1 = new GrayScaleMatrix (chIn);

for (i=0;i<chIn;i++)
for (j=0;j<chIn/8;j++) mtx1->Matrix[i][j] = getc(fin);
mtx1->DisplayMatrix(stdout);

mtx1->CompressMatrix (0,1,1,chIn,1);
fprintf (fout,"%c",(unsigned
char)mtx1->intTotalBlocksCompressed);
mtx1->CompressMatrix (fout,1,1,chIn,1);
}

fclose (fin);
fclose (fout);
return (1);
}

int main (int argc, char** argv)
{

if (argc!=4)
{
printf ("Grayscale matrix compression demo\n");
printf ("Usage: %s [-c|-d] <in>
<out>\n",argv[0]);
printf ("Options: -c - compress existing file\n");
printf (" -d - decompress existing file\n");
printf ("Essemple gratia: %s -c file1.bin
file1.out\n\n",argv[0]);
}
else if (!strcmp(argv[1],"-c")) CompressQuadFile
(argv[2],argv[3]);
else if (!strcmp(argv[1],"-d"))
{
FILE* fin = fopen (argv[2],"rb");
GrayScaleMatrix* mtx1 = new GrayScaleMatrix(MAX_DIMENSION);
mtx1->DecompressMatrix (fin,1);
printf ("\nDecomposed raw of data:\n");
mtx1->DisplayMatrix (stdout);
fclose (fin);
return (0);
}
else return (1);

return (0);
}

I hope these simple tools will be helpful for every data-compression methods researcher.

I appreciate your every improvement or note on any related topic—huanito@ok.kz.

Downloads

Download demo project - 4 Kb


Comments

  • buggy code

    Posted by tramanan on 08/10/2004 03:53am

    I cudn't evn compress 30k file, evry time it crashes. I feel this article should b removed, unless author fixes all these bugs. I think author must have copied from some sites.

    Reply
  • what's going here

    Posted by Legacy on 01/07/2004 12:00am

    Originally posted by: coder

    what are you trying to do? can you explain more. I got 8 bytes quad.in and 12 bytes compressed, 50% larger than original. Or I missed something?

    Reply
  • code looks inefficient

    Posted by Legacy on 05/24/2003 12:00am

    Originally posted by: flintridgeparkenfarker vonkerschnauzerheiden III

    Creating a new object instance for each byte of a file would eat up resources quick when working with large files. Allocating all that memory without cleaning up, not to mention the overhead of all the calculations taking place at once looks disastrous. I don't think I'll try this one. I've seen compression algorithms that were far more efficient IMHO.

    Reply
Leave a Comment
  • Your email address will not be published. All fields are required.

Top White Papers and Webcasts

  • This ESG study by Mark Peters evaluated a common industry-standard disk VTl deduplication system (with 15:1 reduction ratio) versus a tape library with LTO-5, drives with full nightly backups, over a five-year period.  The scenarios included replicated systems and offsite tape vaults.  In all circumstances, the TCO for VTL with deduplication ranged from about 2 to 4 times more expensive than the LTO-5 tape library TCO. The paper shares recent ESG research and lots more. 

  • Live Event Date: September 16, 2014 @ 11:00 a.m. ET / 8:00 a.m. PT Are you starting an on-premise-to-cloud data migration project? Have you thought about how much space you might need for your online platform or how to handle data that might be related to users who no longer exist? If these questions or any other concerns have been plaguing you about your migration project, check out this eSeminar. Join our speakers Betsy Bilhorn, VP, Product Management at Scribe, Mike Virnig, PowerSucess Manager and Michele …

Most Popular Programming Stories

More for Developers

Latest Developer Headlines

RSS Feeds