Code:
#include <stdio.h>
#include <windows.h>
#include "zlib.h"
#pragma comment(lib,"zlib.lib")
int isvalidchar(char ch);
void stripjunk(char *buffer,char type);
int focc(char *cstr,char ch);
int uncomp_bgl(char *bglname,char *datname);
int writegls(char *datname);
char glsheader[1024];
char glsheadertemplate[]=
"### Glossary title:%s\r\n"
"### Author:%s\r\n"
"### Description:%s\r\n"
"### Source language:English\r\n"
"### Source alphabet:Default\r\n"
"### Target language:English\r\n"
"### Target alphabet:Default\r\n"
"### Browsing enabled?No\r\n"
"### Type of glossary:00000000\r\n"
"### Case sensitive words?0\r\n"
";gls generated by bglgls\r\n\r\n"
"### Glossary section:\r\n\r\n";
int main(int argc,char **argv) {
int ix;
char szAuth[32];
char szTitle[32];
char szDescription[128];
char datfname[128];
if(argc!=2) {
printf("usage: bglgls.exe filename.bgl\n");
return 0;
}
//>get input
printf("gls Author:");
fgets(szAuth,32,stdin);
printf("gls Title:");
fgets(szTitle,32,stdin);
printf("gls Description:");
fgets(szDescription,128,stdin);
szAuth[strlen(szAuth)-1]=0;
szTitle[strlen(szTitle)-1]=0;
szDescription[strlen(szDescription)-1]=0;
sprintf(glsheader,glsheadertemplate,szAuth,szTitle,szDescription);
//>set output filename
strncpy(datfname,argv[1],128);
ix=focc(datfname,'.');
if(ix<0) { printf("invalid filename\n"); return 0; }
datfname[ix]=0;
strcat(datfname,".dat");
//>>
if(!uncomp_bgl(argv[1],datfname)) { printf("error uncompressing BGL.\n"); return 0; }
if(!writegls(datfname)) { printf("error writing GLS.\n"); return 0; }
return 0;
}
//>>uncompression routine
int uncomp_bgl(char *bglname,char *datname) {
FILE *ztmp;
FILE *zfile;
char iobuff[128];
char tmppath[256];
char tmpfname[256];
unsigned char zptrbyte;
int tread;
//get temp filename
GetTempPath(256,tmppath);
GetTempFileName(tmppath,"bgl",0,tmpfname);
ztmp=fopen(tmpfname,"wb");
if(!ztmp) return 0;
//>
zfile=fopen(bglname,"rb");
if(!zfile) return 0;
fseek(zfile,0x5,SEEK_SET);
fread(&zptrbyte,sizeof(char),1,zfile);
printf("zlib header@0x%X\n",zptrbyte);
fseek(zfile,zptrbyte,SEEK_SET);
while(!feof(zfile)) {
tread=fread(iobuff,sizeof(char),128,zfile);
fwrite(iobuff,sizeof(char),tread,ztmp);
}
fclose(zfile);
fclose(ztmp);
//>>uncompressing >
zfile=fopen(datname,"wb");
ztmp=gzopen(tmpfname,"rb");
if(!zfile||!ztmp) return 0;
while(!gzeof(ztmp)) {
tread=gzread(ztmp,iobuff,128);
fwrite(iobuff,sizeof(char),tread,zfile);
}
gzclose(ztmp);
fclose(zfile);
DeleteFile(tmpfname); //get rid of temporary file
return 1;
}
//write gls
int writegls(char *datname) {
FILE *fdic,*fgls;
int ix,rec_length;
short int lenword;
unsigned char hdr,high_nibble,lenbyte;
unsigned char lenmul,lenadd;
unsigned long datapos;
char tmpbuff[1024];
char glsf[256];
int tt=0,lt=0;
//gls filename
strcpy(glsf,datname);
ix=focc(glsf,'.');
glsf[ix]=0;
strcat(glsf,".gls");
printf("gls filename:%s\n",glsf);
fgls=fopen(glsf,"wb");
if(!fgls) return 0;
//>write header
printf("writing GLS");
fwrite(glsheader,sizeof(char),strlen(glsheader),fgls);
//>>parsing
fdic=fopen(datname,"rb");
if(!fdic) return 0;
while(1) {
fread(&hdr,sizeof(char),1,fdic);
if(feof(fdic)) break;
//get record size
high_nibble=hdr >> 4;
if(high_nibble>=4) rec_length=high_nibble-4;
else {
for(ix=rec_length=0;ix<high_nibble+1;ix++) {
rec_length*=256;
fread(&lenbyte,sizeof(char),1,fdic);
rec_length+=lenbyte;
}
}
datapos=ftell(fdic);
switch(hdr & 0xF) {
case 1: {
fread(&lenbyte,sizeof(char),1,fdic);
memset(tmpbuff,0,1024);
fread(tmpbuff,sizeof(char),lenbyte,fdic);
if(!isalpha(tmpbuff[0])) break;
stripjunk(tmpbuff,0);
strcat(tmpbuff,"\r\n");
fwrite(tmpbuff,sizeof(char),strlen(tmpbuff),fgls);
fread(&lenmul,sizeof(char),1,fdic);
fread(&lenadd,sizeof(char),1,fdic);
memset(tmpbuff,0,1024);
lenword=lenmul*256+lenadd;
if(lenword>1019) lenword=1019;
fread(tmpbuff,sizeof(char),lenword,fdic);
stripjunk(tmpbuff,1);
strcat(tmpbuff,"\r\n\r\n");
fwrite(tmpbuff,sizeof(char),strlen(tmpbuff),fgls);
if(tt-100==lt) { lt=tt; printf("."); }
tt++;
} break;
default: break;
}
fseek(fdic,datapos+rec_length,SEEK_SET);
}
fclose(fdic);
fclose(fgls);
DeleteFile(datname); //we dont need the *.dat anymore..
printf("%d terms written to file!\n",tt);
return 1;
}
//find occurrence
int focc(char *cstr,char ch) {
int ix;
for(ix=0;(unsigned)ix<strlen(cstr);ix++)
if(cstr[ix]==ch) return ix;
return -1;
}
//>
void stripjunk(char *buffer,char type) {
int ix,slen;
slen=strlen(buffer);
if(!type) {
for(ix=1;ix<slen;ix++)
if(buffer[ix]=='$') { buffer[ix]=0; break; }
slen=ix;
}
for(ix=0;ix<slen;ix++)
if(!isvalidchar(buffer[ix])) { buffer[ix]=0; break; }
}
//valid term/definition char
int isvalidchar(char ch) {
int ix;
char valtab[]="abcdefghijklmnopqrstuvwxyz 0123456789!@#$%&8()_-+=|{}[]<>\"',.%%/\\:;!?";
ch=tolower(ch);
for(ix=0;(unsigned)ix<strlen(valtab);ix++)
if(ch==valtab[ix]) return 1;
return 0;
}
i have tested it with the code_analysis bgl that you suggested and it now works perfectly. i have also tested it with bablyons english_english dictionary (since it is the largest (18mb unpacked)) and it works really well.
Bookmarks