COSC-4P82-Final-Project/lib/lilgp/kernel/ckpoint.c

715 lines
20 KiB
C

/* lil-gp Genetic Programming System, version 1.0, 11 July 1995
* Copyright (C) 1995 Michigan State University
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Douglas Zongker (zongker@isl.cps.msu.edu)
* Dr. Bill Punch (punch@isl.cps.msu.edu)
*
* Computer Science Department
* A-714 Wells Hall
* Michigan State University
* East Lansing, Michigan 48824
* USA
*
*/
#include <lilgp.h>
#ifdef USEVFORK
extern char **environ;
#endif
/* read_checkpoint()
*
* reads a checkpoint file, placing the generation number in gen and filling
* the population (and other structures) with information from the file
*/
void read_checkpoint ( char *filename, int *gen, multipop **mpop )
{
FILE *f;
char *buffer;
ephem_const **eind;
int random_state_bytes;
int i;
char *rand_state;
/* miscellaneous buffer for reading. */
buffer = (char *)MALLOC ( MAXCHECKLINELENGTH );
/* open the file. */
f = fopen ( filename, "rb" );
if ( f == NULL )
{
error ( E_FATAL_ERROR, "couldn't read checkpoint \"%s\".",
filename );
}
oprintf ( OUT_SYS, 30, "reading from checkpoint \"%s\".\n",
filename );
/** confirm the magic word that starts every checkpoint file. **/
fgets ( buffer, MAXCHECKLINELENGTH, f );
if ( strcmp ( buffer, CK_MAGIC ) )
error ( E_FATAL_ERROR,
"\"%s\" is not a lil-gp v1.0 checkpoint file.", filename );
/* skip the human-readable id line. */
fgets ( buffer, MAXCHECKLINELENGTH, f );
#ifdef DEBUG
printf ( "id line: %s", buffer );
#endif
/** read and print the timestamp. **/
fscanf ( f, "%*s " );
fgets ( buffer, MAXCHECKLINELENGTH, f );
/* chop the newline. */
buffer[strlen(buffer)-1] = 0;
oprintf ( OUT_SYS, 30, " checkpoint timestamp: [%s].\n", buffer );
/* skip the "section: global" line. */
fgets ( buffer, MAXCHECKLINELENGTH, f );
#ifdef DEBUG
printf ( "should be global section: %s", buffer );
#endif
/* read the generation number. */
fscanf ( f, "%*s %d\n", gen );
/** read the random number state encoded as a string of hex chars. **/
/* first read the length. */
fscanf ( f, "%*s %d ", &random_state_bytes );
#ifdef DEBUG
fprintf ( stderr, "%d random state bytes.\n", random_state_bytes );
#endif
/* allocate the buffer. */
rand_state = (char *)MALLOC ( random_state_bytes+1 );
/* read the hex data into the buffer. */
read_hex_block ( rand_state, random_state_bytes, f );
/* set the state. */
random_set_state ( &globrand, rand_state );
/* free the buffer. */
FREE ( rand_state );
/* slurp the newline character following the hex data. */
fgetc ( f );
/** skip the "section: parameter" line. **/
fgets ( buffer, MAXCHECKLINELENGTH, f );
#ifdef DEBUG
printf ( "should be parameter section: %s", buffer );
#endif
/* read the parameter database. */
read_parameter_database ( f );
/* make internal copies of function set(s). */
if ( app_build_function_sets() )
error ( E_FATAL_ERROR, "app_build_function_sets() failure." );
/** skip the "section: erc" line. **/
fgets ( buffer, MAXCHECKLINELENGTH, f );
#ifdef DEBUG
printf ( "should be erc section: %s", buffer );
#endif
/* read the list of ephemeral constants, and index them */
eind = read_ephem_list ( f );
/** skip the "section: erc" line. **/
fgets ( buffer, MAXCHECKLINELENGTH, f );
#ifdef DEBUG
printf ( "should be population section: %s", buffer );
#endif
/** read the population **/
/* allocate memory. */
*mpop = (multipop *)MALLOC ( sizeof ( multipop ) );
/* read number of subpops. */
fscanf ( f, "%*s %d\n", &((**mpop).size) );
/* allocate subpop list. */
(**mpop).pop = (population **)MALLOC ( (**mpop).size *
sizeof ( population * ) );
for ( i = 0; i < (**mpop).size; ++i )
{
/** skip each "subpop: #" line. **/
fgets ( buffer, MAXCHECKLINELENGTH, f );
#ifdef DEBUG
printf ( "should be subpop %d: %s", i, buffer );
#endif
/* read the population. */
(**mpop).pop[i] = read_population ( eind, f );
}
/** skip the "section: application" line. **/
fgets ( buffer, MAXCHECKLINELENGTH, f );
#ifdef DEBUG
printf ( "should be application section: %s", buffer );
#endif
/* read application-specific stuff. */
app_read_checkpoint ( f );
/** skip the "section: statistics" line. **/
fgets ( buffer, MAXCHECKLINELENGTH, f );
#ifdef DEBUG
printf ( "should be statistics section: %s", buffer );
#endif
/* read the statistics. */
read_stats_checkpoint ( *mpop, eind, f );
/* close'n'free. */
FREE ( eind );
FREE ( buffer );
fclose ( f );
oprintf ( OUT_SYS, 30, "population read from checkpoint \"%s\".\n",
filename );
}
/* write_checkpoint()
*
* checkpoints the population to the given file.
*/
void write_checkpoint ( int gen, multipop *mpop, char *filename )
{
FILE *f;
unsigned char *rand_state;
ephem_index *eind;
int i;
int random_state_bytes;
time_t now;
char *param;
char *compresscommand[4] = { NULL, NULL, NULL, NULL };
/* open the file. */
f = fopen ( filename, "w" );
if ( f == NULL )
{
error ( E_ERROR, "couldn't write checkpoint \"%s\"; skipping.",
filename );
return;
}
/* write magic number and id string. */
fputs ( CK_MAGIC, f );
fputs ( CK_IDSTRING, f );
/* write timestamp. */
time ( &now );
fprintf ( f, "checkpoint-written: %s", ctime ( &now ) );
/* global section. */
fputs ( "section: global\n", f );
fprintf ( f, "generation: %d\n", gen );
/** write the state of the random number generator. **/
rand_state = random_get_state ( &globrand, &random_state_bytes );
fprintf ( f, "random-state: %d ", random_state_bytes );
/* store buffer as hex data. */
write_hex_block ( rand_state, random_state_bytes, f );
fputc ( '\n', f );
FREE ( rand_state );
/** write the parameter database. **/
fprintf ( f, "section: parameter\n" );
write_parameter_database ( f );
/** write the list of ephemeral constants, and index them. **/
fprintf ( f, "section: erc\n" );
eind = write_ephem_list ( f );
/** write the population. **/
fprintf ( f, "section: population\n" );
fprintf ( f, "subpop-count: %d\n", mpop->size );
for ( i = 0; i < mpop->size; ++i )
{
fprintf ( f, "subpop: %d\n", i );
write_population ( mpop->pop[i], eind, f );
}
/** application-specific data. **/
fprintf ( f, "section: application\n" );
app_write_checkpoint ( f );
/** statistics structures. **/
fprintf ( f, "section: statistics\n" );
write_stats_checkpoint ( mpop, eind, f );
/** close'n'free. **/
FREE ( eind );
fclose ( f );
oprintf ( OUT_SYS, 20, " population checkpointed: \"%s\".\n",
filename );
/** do we compress the checkpoint file? **/
param = get_parameter ( "checkpoint.compress" );
if ( param )
{
#if defined(USEVFORK) || defined(USESYSTEM)
/* allocate a string big enough to hold the command. */
compresscommand[2] = (char *)MALLOC ( 2*(strlen(param) +
strlen(filename)) *
sizeof ( char ) );
/* create the command string. */
sprintf ( compresscommand[2], param, filename );
#ifdef DEBUG
oprintf ( OUT_SYS, 20, " compression command is [%s]\n",
compresscommand[2] );
#endif
#ifdef USEVFORK
/* in unix (solaris at least), a system() call performs a fork(),
* then an exec(). the fork system call copies the entire address
* space of the parent process to the child process. for large
* GP applications, this could be intolerably slow.
*
* vfork() does a fork without copying the address space. it can
* be used when the child immediately exec()s following the
* vfork().
*
* we use exec() to do a "/bin/sh -c compresscommand" to parse
* and execute the compression command.
*
* we neither wait for the child to complete nor check the exit
* status to see if the compression was successful.
*/
/** create the rest of the argv[] array to pass to the child. */
compresscommand[0] = "/bin/sh";
compresscommand[1] = "-c";
if ( !vfork() )
{
execve ( "/bin/sh", compresscommand, environ );
_exit(1);
}
#else
/* this is provided for non-unix systems which don't provide the
* vfork() call but do have the system() call.
*/
system ( compresscommand[2] );
#endif
FREE ( compresscommand[2] );
oprintf ( OUT_SYS, 20, " checkpoint compressed.\n" );
#else
/* neither vfork() nor system() is available;
can't do compression. */
oprintf ( OUT_SYS, 20, " checkpoint compression unavailable.\n" );
#endif
}
}
/* read_population()
*
* allocates a population structure, reads a population from a checkpoint
* file into it, and returns it. must be passed an index to look up ERCs
* in.
*/
population *read_population ( ephem_const **eind, FILE *f )
{
int i;
char *buffer;
population *pop;
/* allocate. */
pop = (population *)MALLOC ( sizeof ( population ) );
/* read the "size" and "next" fields. */
fscanf ( f, "%*s %d\n%*s %d\n", &(pop->size), &(pop->next) );
/* allocate the individual array. */
pop->ind = (individual *)MALLOC ( pop->size * sizeof ( individual ) );
/* this buffer is used by read_individual for reading and parsing
function names in trees. we allocate it here, so that all calls
to read_individual share the same buffer (we don't have to repeatedly
allocate and free). */
buffer = (char *)MALLOC ( MAXCHECKLINELENGTH );
for ( i = 0; i < pop->size; ++i )
{
read_individual ( pop->ind+i, eind, f, buffer );
}
FREE ( buffer );
return pop;
}
/* read_individual()
*
* reads a single individual from a checkpoint file into the given individual
* pointer. it does NOT allocate the pointer.
*/
void read_individual ( individual *ind, ephem_const **eind, FILE *f,
char *buffer )
{
int j, k[3];
/* read the evald and flags fields. */
fscanf ( f, "%d %d ", &(ind->evald), &(ind->flags) );
if ( ind->evald == EVAL_CACHE_VALID )
{
/** if the individual has valid fitness values saved in the
file, read them. **/
/* skip over the human-readable fitness values and read the
hits count. */
fscanf ( f, "%*f %*f %*f %d ", &(ind->hits) );
/** the fitness values, which are double precision, are dumped out
in hex so that no significant digits are lost. **/
read_hex_block ( &(ind->r_fitness), sizeof(double), f );
fgetc ( f );
read_hex_block ( &(ind->s_fitness), sizeof(double), f );
fgetc ( f );
read_hex_block ( &(ind->a_fitness), sizeof(double), f );
fgetc ( f );
}
#ifdef DEBUG
fprintf ( stderr, "%lf %lf %lf %d %d %d\n",
ind->r_fitness,
ind->s_fitness,
ind->a_fitness,
ind->hits,
ind->evald,
ind->flags );
#endif
/* allocate the array of trees. */
ind->tr = (tree *)MALLOC ( tree_count * sizeof ( tree ) );
for ( j = 0; j < tree_count; ++j )
{
/* read the tree number, tree size, and tree node count. */
fscanf ( f, "%d %d %d ", k+0, k+1, k+2 );
/** read the tree into a generation space, then copy it to
it's final location. **/
gensp_reset ( 0 );
read_tree_recurse ( 0, eind, f, j, buffer );
gensp_dup_tree ( 0, ind->tr+j );
#ifdef DEBUG_READTREE
fprintf ( stderr, "file: %d %d %d here: %d %d %d\n",
k[0], k[1], k[2], j, ind->tr[j].size,
ind->tr[j].nodes );
print_tree ( ind->tr[j].data, stderr );
#endif
if ( k[0] != j ||
k[1] != ind->tr[j].size ||
k[2] != ind->tr[j].nodes )
{
/** if the values in the checkpoint file don't match the
values of the tree we read, this is a problem. this, of
course should never happen. **/
/*printf("Correct size: %d Nodes: %d\n",ind->tr[j].size,ind->tr[j].nodes);*/
error ( E_FATAL_ERROR, "checkpoint file corrupted in population section." );
}
}
}
/* read_tree_recurse()
*
* function to recursively read a tree from a checkpoint file.
*/
void read_tree_recurse ( int space, ephem_const **eind, FILE *fil, int tree,
char *string )
{
function *f;
int i, j;
ephem_const *ep;
/* read up until a nonwhitespace character in file. the nonwhitespace
character is saved in string[0]. */
while ( isspace(string[0]=fgetc(fil)) );
/* get the next character. */
i = fgetc ( fil );
if ( isspace(i) )
/* if the next character is whitespace, then string[0] is a
one-character function name. null-terminate the string. */
string[1] = 0;
else
{
/** if the next character is not whitespace, then string[0]
is either an open parenthesis or the first character of a
multi-character function name. **/
/* push the next character back. */
ungetc ( i, fil );
/* read the function name. skip over an open parenthesis, if there
is one. */
fscanf ( fil, "%s ", string+(string[0]!='(') );
}
#ifdef DEBUG_READTREE
fprintf ( stderr, "function name is [%s]\n", string );
#endif
/* look up the function name in this tree's function set. if the
function is an ERC terminal (the name is of the form "name:ERCindex"),
then place the ERC address in ep. */
f = get_function_by_name ( tree, string, &ep, eind );
/* add an lnode to the tree. */
gensp_next(space)->f = f;
switch ( f->type )
{
case TERM_NORM:
case TERM_ARG:
case EVAL_TERM:
break;
case TERM_ERC:
/* record the ERC address as the next lnode in the array. */
gensp_next(space)->d = ep;
break;
case FUNC_DATA:
case EVAL_DATA:
/** recursively read child functions, no skip nodes needed. **/
for ( i = 0; i < f->arity; ++i )
read_tree_recurse ( space, eind, fil, tree, string );
break;
case FUNC_EXPR:
case EVAL_EXPR:
/** recursively read child functions, recording skip values. **/
for ( i = 0; i < f->arity; ++i )
{
/* save an lnode for the skip value. */
j = gensp_next_int ( space );
/* read the child tree. */
read_tree_recurse ( space, eind, fil, tree, string );
/* figure out how big the child tree was, and save that
number in the skip node. */
gensp[space].data[j].s = gensp[space].used-j-1;
}
break;
}
}
/* get_function_by_name()
*
* looks up a function name in the function set for the given tree. if
* the function is an ERC, looks up the index (encoded in the name)
* and stores the ERC address in ep.
*/
function * get_function_by_name ( int tree, char *string, ephem_const **ep,
ephem_const **eind )
{
int i, j, k;
function_set *fs = fset+tree_map[tree].fset;
k = strlen ( string );
for ( i = 0; i < k; ++i )
{
if ( string[i] == ':' )
{
/* names of the form "name:index" are chopped at the colon,
and the value of the index saved. */
string[i] = 0;
j = atoi ( string+i+1 );
break;
}
else if ( string[i] == ')' )
{
/* chop the name at the first closing parenthesis, since we
could be passed a string like "function))))" */
string[i] = 0;
break;
}
}
/* find the string in the function set. */
for ( i = 0; i < fs->size; ++i )
if ( strcmp ( string, fs->cset[i].string ) == 0 )
{
if ( fs->cset[i].type == TERM_ERC )
{
/* if this is an ERC, lookup the saved index in the
eind table, and store the looked-up address in ep. */
*ep = eind[j];
(*ep)->f = fs->cset+i;
}
/* return a pointer to the function. */
return fs->cset+i;
}
/* this, of course, should never happen. */
return NULL;
}
/* write_population()
*
* writes a population to a checkpoint file.
*/
void write_population ( population *pop, ephem_index *eind, FILE *f )
{
int i;
/* write size and next fields. */
fprintf ( f, "size: %d\nnext: %d\n", pop->size, pop->next );
/* write each individual. */
for ( i = 0; i < pop->size; ++i )
{
write_individual ( pop->ind+i, eind, f );
}
}
/* write_individual()
*
* writes an individual to a checkpoint file. uses eind to change ERC
* addresses to integer indices.
*/
void write_individual ( individual *ind, ephem_index *eind, FILE *f )
{
int j;
lnode *l;
/* write evald and flags fields. */
fprintf ( f, "%d %d ", ind->evald, ind->flags );
if ( ind->evald == EVAL_CACHE_VALID )
{
/** if the fitness values are valid... **/
/* ...write them in human-readable form. */
fprintf ( f, "%lf %lf %lf %d ",
ind->r_fitness, ind->s_fitness,
ind->a_fitness, ind->hits );
/** then write the double-precision values as hex blocks, so
as not to lose significant digits. **/
write_hex_block ( &(ind->r_fitness), sizeof(double), f );
fputc ( ' ', f );
write_hex_block ( &(ind->s_fitness), sizeof(double), f );
fputc ( ' ', f );
write_hex_block ( &(ind->a_fitness), sizeof(double), f );
}
fputc ( '\n', f );
/** now write the trees of the individual. **/
for ( j = 0; j < tree_count; ++j )
{
/* write tree number, size, nodes. */
fprintf ( f, "%d %d %d ", j, ind->tr[j].size,
ind->tr[j].nodes );
/** write tree data. **/
l = ind->tr[j].data;
write_tree_recurse ( &l, eind, f );
fputc ( '\n', f );
}
}
/* write_tree_recurse()
*
* function to recursively write trees to a checkpoint file. the same
* as print_tree_recurse(), except that ERC nodes are written as
* "name:index" rather than the value, using eind to translate addresses
* to indices.
*/
void write_tree_recurse ( lnode **l, ephem_index *eind, FILE *fil )
{
function *f;
int i;
/* remember which function we are. */
f = (**l).f;
/* a space, then an open-paren if this function is not a terminal. */
fputc ( ' ', fil );
if ( f->arity != 0 )
fprintf ( fil, "(" );
++*l;
if ( f->type == TERM_ERC )
{
/* ERCs printed as "name:index". */
fprintf ( fil, "%s:%d", f->string,
lookup_ephem ( eind, (**l).d ) );
++*l;
}
else
/* everything else printed normally. */
fprintf ( fil, "%s", f->string );
switch ( f->type )
{
case FUNC_DATA:
case EVAL_DATA:
/** recursively print children. **/
for ( i = 0; i < f->arity; ++i )
write_tree_recurse ( l, eind, fil );
break;
case FUNC_EXPR:
case EVAL_EXPR:
/** recursive print children, ignoring the skip nodes. **/
for ( i = 0; i < f->arity; ++i )
{
++*l;
write_tree_recurse ( l, eind, fil );
}
break;
}
if ( f->arity != 0 )
fprintf ( fil, ")" );
}
/* write_hex_block()
*
* writes a block of memory to a file, as a string of hex characters.
*/
void write_hex_block ( void *buf, int n, FILE *f )
{
int i;
unsigned char *b = (unsigned char *)buf;
for ( i = 0; i < n; ++i )
fprintf ( f, "%02x", b[i] );
}
/* read_hex_block()
*
* reads hex characters into a block of memory, the inverse of
* write_hex_block().
*/
void read_hex_block ( void *buf, int n, FILE *f )
{
int i;
unsigned char *b = (unsigned char *)buf;
int c[2] = { 0, 0 };
for ( i = 0; i < n; ++i )
{
c[0] = fgetc ( f );
c[1] = fgetc ( f );
/* convert hex chars to base 10. */
c[0] = c[0]>'9' ? c[0]-'a'+10 : c[0]-'0';
c[1] = c[1]>'9' ? c[1]-'a'+10 : c[1]-'0';
b[i] = c[0] * 16 + c[1];
}
}