--- autoclass-3.3.6.orig/changelog
+++ autoclass-3.3.6/changelog
@@ -0,0 +1,1568 @@
+
+ AUTOCLASS C VERSION 3.3.6 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 3.3.6.
+
+ 2. autoclass-c/prog/utils.c -
+ Comment out the last error check in safe_sprintf to prevent errors of this type:
+ "ERROR: vsprintf produced 41 chars (max number is 40) -- called by log_transform
+ Abort".
+
+ 3. autoclass-c/prog/intf-reports.c -
+ Reworked the placement of '#' characters for the 'comment_data_headers_p = true'
+ report setting. Attribute names for both real and discrete attributes can now
+ be of arbitrary length and remain on the same line, i.e. they will not be split
+ onto two lines.
+
+ 4. autoclass-c/prog/io-read-data.c -
+ Print standard deviation, rather than variance, for input data summary of real
+ attributes in the .log file. (output_att_statistics & output_real_att_statistics)
+
+ AUTOCLASS C VERSION 3.3.5 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 3.3.5.
+
+ 2. autoclass-c/load-ac, autoclass-c/prog/autoclass.make.freebsd.gcc -
+ Add support for the FreeBSD unix variant operating system.
+
+ 3. autoclass-c/prog/model-multi-normal-cn.c -
+ Change all calls to log, with safe_log, which checks for a zero
+ argument. Certain real valued data set values caused a
+ double precision underflow (< e-308) which resulted in 0.0.
+
+ 4. autoclass-c/prog/intf-reports.c, utils-math.c, search-control-2.c,
+ model-update.c, model-transforms.c, model-single-normal-cm.c,
+ model-single-normal-cn.c, model-single-multinomial.c,
+ model-expander-3.c -
+ Make the change in item 3. to all files referencing log.
+
+ 5. autoclass-c/sample/read.me.c -
+ Correct file name typo: scriptc.lisp => scriptc.text
+
+ 6. autoclass-c/load-ac -
+ To prevent bad default .cshrc files from crashing the build,
+ change "#!/bin/csh" to "#!/bin/csh -f".
+
+ 7. autoclass-c/prog/io-results.c -
+ write_att_DS modified to output warnings_and_errors->num_expander_warnings
+ and warnings_and_errors->num_expander_errors strings with embedded
+ carriage returns removed. This corrects a problem which occurs
+ when the user's data generates warning messages during input checking,
+ which the user ignores, and the user has specified save_compact_p = false
+ and read_compact_p = false in their .s-params file. When they
+ attempt to create reports, "autoclass -reports ..." breaks with
+ an unexpected data error.
+
+ 8. autoclass-c/load-ac-macosx, autoclass-c/prog/autoclass.make.macosx.gcc -
+ Add support for the Macintosh OSX 10.4 operating system utilizing
+ gcc 4.0. (OSFLAGS=-DMACOSX)
+
+ 9. autoclass-c/prog/utils.c, autoclass-c/prog/autoclass.h -
+ Routine "int round(double)" replaced by "int iround(double)".
+ References to "round" were changed to "iround" in all affected routines.
+
+ 10. autoclass-c/prog/autoclass.h -
+ For MacOSX, do not define INFINITY here -- conflicts with OSX math library.
+
+
+ AUTOCLASS C VERSION 3.3.4 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/sample files were regenerated because of the
+ SAFE_LOG change (item 6. below). Only very minor changes
+ occurred.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 3.3.4.
+
+ 2. autoclass-c/prog/predictions.c -
+ In autoclass_predict, allocate separate storage for
+ test_clsf->reports->class_wt_ordering to prevent
+ segmentation violation on Linux platforms when running
+ in predict mode.
+
+ 3. autoclass-c/prog/autoclass.h, minmax.h -
+ Macros min() and max() have been moved to a new file:
+ minmax.h.
+
+ Added `#include "minmax.h"' to the following files:
+ intf-reports.c io-read-data.c matrix-utilities.c
+ model-multi-normal-cn.c model-single-normal-cm.c
+ model-single-normal-cn.c model-update.c
+ search-basic.c search-control-2.c search-control.c
+ statistics.c struct-data.c utils.c
+
+ Removed the prototypes for build_sn_cm_priors() and
+ build_sn_cn_priors(). These functions are used only
+ in the .c files that contain them, so are now static
+ functions.
+
+ Changed the prototype for log_gamma(), for reasons
+ explained below.
+
+ 4. autoclass-c/prog/getparams.c -
+ Corrected argument to sizeof() on line 142.
+
+ 5. autoclass-c/prog/struct-clsf.c -
+ Zero global pointer and counter variables after deleting the
+ structures to which they refer.
+
+ 6. autoclass-c/prog/utils-math.c -
+ Before, the function safe_log() returned 0.0 when its
+ argument was less than or equal to LEAST_POSITIVE_SINGLE_FLOAT.
+ This is clearly wrong. Log(x) approaches -infinity (not 0)
+ as x approaches 0. The fix is to have safe_log() return
+ LEAST_POSITIVE_SINGLE_LOG for x near 0.
+
+ 7. autoclass-c/prog/search-control-2.c -
+ In variance, check for lists of length less than 2,
+ and return 0.
+
+
+ Items 3 - 7 were submitted by Jack Wathey <wathey@salk.edu>.
+
+
+ 8. autoclass-c/prog/intf-reports.c -
+ Correct FORMAT_DISCRETE_ATTRIBUTE to prevent string overrun and
+ segmentation violations when single multinomial values exceed
+ 20 characters, while running in report mode.
+
+ 9. autoclass-c/prog/io-results.c -
+ Correct READ_ATT_DS to prevent string overrun and
+ segmentation violations when single multinomial values exceed
+ 40 characters, while running in report mode.
+
+
+
+
+
+
+
+ AUTOCLASS C VERSION 3.3.3 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/reports-c.text - document that report log messages
+ will go into a ".rlog" file, rather than the ".log" which is
+ used during search runs. Also minor typos corrected.
+
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 3.3.3.
+
+ 2. autoclass-c/prog/init.c, intf-reports.c, intf-sigma-contours.c -
+ Sun Solaris CC compiler breaks when #ifdef, etc preprocessor
+ directives do not start in column 1. All preprocessor
+ directives now start in column 1.
+
+ 3. autoclass-c/prog/autoclass.make.alpha.cc,
+ autoclass-c/load-ac -
+ A Makefile for the Dec Alpha (OSF1 v4.0) has been added.
+
+ 4. autoclass-c/prog/prints.c -
+ Modified PRINT_VECTOR_F to eliminate compiler warning.
+
+ 5. autoclass-c/prog/search-control.c -
+ Conditionalize two warning tests to fail in batch mode
+ (.s-params parameter interactive_p = false), rather than
+ attempt to ask the user whether to proceed.
+
+ 6. autoclass-c/prog/autoclass.h, autoclass.c, io-results.c -
+ To make it convenient to generate reports while the search
+ is still running, so you can decide whether or not to stop the
+ search, but not have the search log file be overwritten with the
+ report log file, the report log file will now be written to a
+ file with the extension ".rlog". The search output will continue
+ to be directed to a file with the extension ".log".
+
+ 7. autoclass-c/prog/getparams.c, init.c, io-read-model.c, struct-class.c -
+ Change sizeof(int) to sizeof(void *), so that 64-bit architectures
+ will be handled properly. This corrects the core dump which occurs
+ on Dec Alpha platforms at the end of each search or reports run, when
+ AutoClass C attempts to free allocated storage.
+
+
+
+
+
+
+
+
+
+
+ AUTOCLASS C VERSION 3.3.2 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/search-c.text -
+ Add a paragraph to section 8.0 SEARCH VARIATIONS discussing
+ how running AutoClass in prediction mode can indicate whether
+ you currently have a well converged classification.
+
+ Add a paragraph to section 11.0 JUST HOW SLOW IS IT? discussing
+ how to deal with very large data sets.
+
+ 2. autoclass-c/doc/preparation-c.text -
+ Add section 1.3.1.1 HANDLING MISSING VALUES, which discusses
+ AutoClass C's approach to dealing with missing values in the
+ input data set.
+
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+
+ Update "G_ac_version" to 3.3.2.
+
+ 2. autoclass-c/prog/autoclass.h, intf-reports.c -
+ In AUTOCLASS_REPORTS, write the default and overridden parameters
+ from the .r-params file to the log file. Add error checking for
+ report_type.
+
+ In CASE_CLASS_DATA_SHARING, correct problem where "-predict" mode
+ ignores report_type options of "xref_case" and "xref_class".
+
+ 3. autoclass-c/prog/autoclass.c -
+ In MAIN, initialize log file for "-predict" mode. Prior to this
+ change, there were no log entries for "-predict" mode runs.
+
+ 4. autoclass-c/prog/io-read-data.c -
+ In CHECK_ERRORS_AND_WARNINGS, write warnings and errors to log file.
+ In the situations of restarting a classification search, running a
+ classification report, or running a classification prediction,
+ warning and error messages which have until now gone only to the
+ screen, will now go into the log file.
+
+ 5. autoclass-c/prog/intf-reports.c, predictions.c -
+ In AUTOCLASS_REPORTS, pass log file arguments to AUTOCLASS_PREDICT,
+ so that errors and warnings generated in processing the test
+ classification will be written to the log file.
+
+ 6. autoclass-c/prog/model-transforms.c -
+ In LOG_TRANSFORM, add "Suggest decreasing attribute's rel_error." to
+ error message: "log transform of attribute# 5 using mn -120.398972
+ rather than 0.000000 for zero_point."
+
+
+======================================================================
+
+
+
+ AUTOCLASS C VERSION 3.3.1 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+
+ Update "G_ac_version" to 3.3.1.
+
+ 2. autoclass-c/prog/io-results.c, io-results-bin.c -
+ In READ_CLSF_SEQ and LOAD_CLSF_SEQ, check for win/unx suffix
+ in ac_version before stripping it off. This corrects an
+ incompatibility with .results[-bin] files written by AutoClass C
+ versions prior to version 3.3. The error, for .results-bin files,
+ looks like this: ERROR: expecting "ac_version n.n", found "ac_version ";
+ Abort.
+
+
+
+======================================================================
+
+
+
+ AUTOCLASS C VERSION 3.3 NOTES
+======================================================================
+======================================================================
+
+NOTE: This version is an integrated source port of version 3.2.2 to
+ Windows NT/95. There are no new capabilities or bug fixes over
+ and above version 3.2.2.
+
+Windows Compatibility Changes:
+-----------------------------------
+ Thanks to Autumn <autumn@netron.com>, we now have an integrated
+ source release of AutoClass C for Unix platforms and Windows
+ platforms (requires Microsoft Visual C++ 5.0). Due to the Unix
+ "line feed" and the Windows "carriage return/line feed" incompatibility,
+ there are two distributions, one for Unix-based platforms, and one
+ for Windows platforms.
+
+ Summary of source changes:
+ autoclass-c/prog/*.c -
+ Using _MSC_VER in preprocessor forms, remove the include of
+ Unix-specific headers, and add the Win32 equivalent.
+ autoclass-c/prog/autoclass.h -
+ Use rand in place of lrand48, therefore change srand48 to srand.
+ autoclass-c/prog/getparams.c, getparams.h, intf-reports.c, search-control.c
+ Prefixed enum members with T so they would not clash with predefined types.
+ autoclass-c/prog/init.c
+ getcwd is called _getcwd in MSVC.
+ autoclass-c/prog/model-expander-3.c, params.h, struct-class.c -
+ Prefixed enum member IGNORE with T so it would not clash with
+ predefined type.
+ autoclass-c/prog/search-control.c -
+ Made two queries conditional on interactive_p; moved init of stream
+ with stdout -- MSVC doesn't think it's a constant.
+ autoclass-c/prog/utils.c -
+ Created an lrand48 for win32; created a char_input_test() that works
+ with win32.
+ autoclass-c/prog/fcntlcom-ac.h -
+ Adapt for MSVC.
+
+
+Documentation:
+------------------------------
+
+ 1. Update sample AutoClass C run files contained in autoclass-c/sample.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+
+ Update "G_ac_version" to 3.3unx or 3.3win, depending on which
+ platform AutoClass C is compiled.
+
+ 2. autoclass-c/prog/globals.c, globals.h, intf-extensions.c, intf-reports.c,
+ search-control-2.c, search-control.c, io-results-bin.c,
+ io-results.c, io-read-model.c, & io-read-data.c -
+
+ Add G_slash, which is "/" for Unix, and "\" for Windows.
+ Change all occurrences of local variable "slash" to "G_slash".
+ This will allow AutoClass C to handle both Unix and Windows
+ relative and absolute pathnames properly.
+
+ 3. autoclass-c/prog/init.c -
+
+ In INIT, append either "/" or "\" to G_absolute_pathname.
+
+ 4. autoclass-c/prog/intf-sigma-contours.c -
+
+ In GENERATE_SIGMA_CONTOURS use %+13e for Windows instead of
+ %13e in order to maintain column alignment for positive and
+ negative values.
+
+ 5. autoclass-c/prog/intf-reports.c -
+
+ For Windows, use %+9.2e rather than %9.2 in FORMAT_REAL_ATTRIBUTE
+ and FORMAT_DISCRETE_ATTRIBUTE, in order to maintain column alignment
+ for positive and negative values.
+
+ Call FILTER_E_FORMAT_EXPONENTS to filter Windows peculiar %e format
+ output of e+000 => e+00, and e-000 => e-00.
+
+ 6. autoclass-c/prog/intf-sigma-contours.c -
+
+ For Windows, use %+13e rather than %13 in GENERATE_SIGMA_CONTOURS,
+ in order to maintain column alignment for positive and negative values.
+
+ Call FILTER_E_FORMAT_EXPONENTS to filter Windows peculiar %e format
+ output of e+000 => e+00, etc, and e-000 => e-00.
+
+
+ 7. autoclass-c/prog/io-results.c, io-results-bin.c -
+
+ In READ_CLSF_SEQ and LOAD_CLSF_SEQ, truncate unx/win from the version
+ prior to checking for numeric content.
+
+ 8. autoclass-c/prog/autoclass.c -
+ In AUTOCLASS_ARGS, display "autoclass" or "Autoclass.exe" depending
+ on platform.
+
+======================================================================
+
+
+
+ AUTOCLASS C VERSION 3.2.2 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/search-c.text - clarify the usage of the
+ RECONVERGE_TYPE parameter.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+
+ Update "G_ac_version" to 3.2.2.
+
+ 2. autoclass-c/data/uci-dbs-readme.text -
+ Replaced out-of-data information with current Web pointer.
+
+ 3. autoclass-c/data/tests.c &
+ autoclass-c/data/glass/ report files
+ Version 3.2 contained changes to the multi-normal-cn model
+ which changed slightly the results of the non-random test
+ cases. They have been updated.
+
+
+======================================================================
+
+
+
+ AUTOCLASS C VERSION 3.2.1 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/checkpoint-c.text - bring up to date the usage of
+ force_new_search_p in the examples.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c, globals.h, search-control.c, intf-reports.c,
+ autoclass.c, io-results.c, io-results-bin.c -
+
+ Update "G_ac_version" to 3.2.1, and change type from float to
+ string.
+
+ 2. autoclass-c/prog/autoclass.h, io-read-data.c -
+ Comment out unused functions: DEFINE_DISCRETE_TRANSLATIONS, and
+ PROCESS_DISCRETE_TRANSLATIONS.
+
+ Unused #defines MAXINT and DBG_LL commented out.
+
+
+======================================================================
+
+
+
+ AUTOCLASS C VERSION 3.2 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/search-c.text -
+ Added a new section: 14.0 How to get AutoClass C to Produce
+ Repeatable Results.
+
+ Added information about running AutoClass C with more than 1000
+ attributes in sections: 10.0 Do I Have Enough Memory and Disk Space?
+
+ Changed the behavior of search parameter force_new_search_p in
+ order to prevent search trials from being inadvertently lost:
+ if TRUE, will ignore any previous search results, discarding the
+ existing .search & .results[-bin] files after confirmation by the
+ user; if FALSE, will continue the search using the existing
+ .search & .results[-bin] files. The default value of
+ force_new_search_p is now true.
+
+ 2. autoclass-c/doc/interpretation-c.text -
+ Added section headings and a new section entitled: Comparing
+ Influence Report Class Weights And Class/Case Report Assignments
+
+ 3. autoclass-c/doc/preparation-c.text -
+ Added more to section: 1.2.1 SINGLE_NORMAL_CN/CM and
+ MULTI_NORMAL_CN Models
+
+ 4. autoclass-c/doc/reports-c.text -
+ Improved the last pargraph of Generating Sigma Contour Values.
+
+ Replace parameters start_sigma_contours_att and
+ stop_sigma_contours_att with sigma_contours_att_list, to allow
+ non-contiguous groups of attributes to be specified.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 3.2.
+
+ 2. autoclass-c/prog/intf-reports.c -
+ In INFLUENCE_VALUES_HEADER, change
+ `fprintf( influence_report_fp, header);' to
+ `fprintf( influence_report_fp, header, "");', and in
+ CLASS_WEIGHTS_AND_STRENGTHS and CLASS_DIVERGENCES add args to
+ output_title fprintf for new page -- this prevents
+ segmentation faults, when the number of attributes exceeds one
+ page, while in report_mode = "text".
+
+ 3. autoclass-c/prog/intf-sigma-contours.c -
+ In COMPUTE_SIGMA_CONTOUR_FOR_2_ATTS, corrected initialization
+ of *rotation. This corrects erroneous values of the contour's
+ rotation.
+
+ 4. autoclass-c/prog/struct-class.c -
+ Correct compiler warning "struct-class.c:239: warning:
+ unused variable `database'".
+
+ 5. autoclass-c/prog/struct-data.c, globals.h, globals.c, search-control.c -
+ In EXPAND_DATABASE, use comp_database->n_data rather than
+ G_s_params_n_data, since G_s_params_n_data does not do the right thing
+ when expand_database is called during report generation (it reads
+ the whole file, not just n_data cases). Remove references to
+ G_s_params_n_data from the 2nd to 4th files.
+
+ 6. autoclass-c/prog/intf-reports.c -
+ In XREF_GET_DATA, allocate more storage for instance class
+ probabilities if there are more than MAX_NUM_XREF_CLASS_PROBS, and
+ only save for printing a maximum of MAX_NUM_XREF_CLASS_PROBS classes.
+
+ IMPORTANT NOTE: This bug fix means that for any previous reports
+ generated by AutoClass C, any data base instance
+ which has five class probability entries in the class cross-reference
+ report, and 1.0 minus the sum of the five probabilities is greater
+ than the largest of them, is in the WRONG CLASS! Re-run the reports
+ with this version!
+
+ 7. autoclass-c/prog/autoclass.c -
+ Print the AutoClass C version when the user invokes AutoClass
+ with no arguments: % autoclass
+
+ 8. autoclass-c/load-ac -
+ Specified define flags for SunOS gcc and Solaris gcc compilations
+ to prevent compiler warnings. Added IRIX 6.4 compatibility.
+
+ 9. autoclass-c/prog/autoclass.h -
+ For gcc under SunOS, include function prototypes for *rand48 functions,
+ to prevent compiler warnings.
+
+ 10. autoclass-c/prog/intf-reports.c -
+ Add descriptive text for each influence value class parameter for
+ reports with parameter report_mode = "text".
+
+ 11. autoclass-c/prog/autoclass.make.solaris.cc -
+ Corrected optimization flag.
+
+ 12. autoclass-c/prog/intf-reports.c -
+ In FORMAT_REAL_ATTRIBUTE, correct correlation matrices print-out
+ for non-contiguous model term attributes, and print matrices only once,
+ after all class attributes are listed.
+
+ 13. autoclass-c/prog/search-control.c -
+ In AUTOCLASS_SEARCH, if force_new_search_p is false, exit if there
+ is no <...>.results[-bin] file. Make TRUE the default for
+ force_new_search_p.
+
+ 14. autoclass-c/prog/intf-reports.c -
+ In PRINT_ATTRIBUTE_HEADER, remove references to INTEGER attribute type.
+
+ 15. autoclass-c/prog/getparams.c -
+ In GETPARAMS, correct logic so that missing "line feed" on last line
+ of the file will be read properly, rather than getting:
+ ERROR: line read exceeds 100 characters: <.....>.
+ In GETPARAMS, correct logic so that an empty integer list (e.g.
+ start_j_list =) may be entered in the .s-params file. This is needed
+ for a restart search situation when it is necessay to peel off as many
+ classes from the start_j_list as were already done by the previous run.
+ If all of the start_j_list was done already, then an empty list is
+ required.
+
+ 16. autoclass-c/prog/io-read-data.c, io-results.c, io-results-bin.c -
+ In READ_DATA, EXPAND_CLSF_WTS, and LOAD_CLASS_DS_S add checks for
+ "out of memory" returns from malloc and realloc.
+
+ 17. autoclass-c/prog/io-results.c -
+ In MAKE_AND_VALIDATE_PATHNAME, VALIDATE_RESULTS_PATHNAME,
+ VALIDATE_DATA_PATHNAME, and GET_CLSF_SEQ change strchr to strrchr
+ to handle `../filename.extension'
+
+ 18. autoclass-c/prog/autoclass.h, predictions.c, search-basic.c, &
+ search-control.c -
+ Notify the user with a warning messasge and an option to exit from
+ an initial classification run, if the data set size is greater than
+ 1000. The messasge is "WARNING: the default start_j_list may not
+ find the correct number of classes in your data set!".
+
+ 19. autoclass-c/prog/autoclass.h, autoclass.c, & intf-reports.c -
+ Write -reports option screen output to log file.
+
+ 20. autoclass-c/prog/io-read-data.c -
+ In FIND_DISCRETE_STATS, when the number of discrete value
+ translators is less than attribute definition range, reduce the
+ range and output an advisory, rather than outputting warning
+ message and asking the user whether to proceed or not.
+
+ The above change was REMOVED, since it caused an incompatablility with
+ previous results files: "ERROR: expand_database found unmatched common
+ attributes defs in <.results[-bin] file> and ........
+
+ 21. autoclass-c/prog/global.h, global.c, search-control-2.c, & search-control.c -
+ Warn user of search trials which do not converge, which means that
+ their number of try cycles reached the value of the "max_cycles" search
+ parameter. Do this by printing a warning message after the trial completes.
+ Also after the "SUMMARY OF n BEST RESULTS" at the conclusion of each
+ run, print "SUMMARY OF TRY CONVERGENCE" for the n best results.
+
+ 22. autoclass-c/prog/model-multi-normal-cn.c -
+ It was recently brought to our attention that the multi-normal
+ model, with more than about 10 attributes and several thousand
+ instances, would consistently run to the the max_duration or
+ max_n_tries limit, regardless of how large those limits were.
+ Suitably instrumented experiments showed that EM (expectation
+ maximization) was actually oscillating. The problem was traced
+ to a conceptual error in the underflow limiting code that
+ constrains the estimation of empirical standard deviations.
+ This has been corrected. However users should be alert for,
+ and report, any further problems of this nature.
+
+ 23. autoclass-c/prog/autoclass.h, intf-reports.c -
+ For MNcn attributes, do not sort them within their model term
+ when order_attributes_by_influence_p = false. The outputing of
+ MNcn correlation matrices after last class attribute, instead of
+ after each term, is now done by a call to
+ GENERATE_MNCN_CORRELATION_MATRICES from
+ AUTOCLASS_CLASS_INFLUENCE_VALUES_REPORT.
+
+ 24. autoclass-c/prog/intf-reports.c, intf-sigma-contours.c -
+ Replace report parameters start_sigma_contours_att and
+ stop_sigma_contours_att with sigma_contours_att_list, to allow
+ non-contiguous groups of attributes to be specified.
+
+ Check for attribute indices of reports parameter
+ sigma_contours_att_list which are declared "ignore" by the .model file.
+ Prevents segmentation fault.
+
+ Correct erroneous rotations for non-covariant pairs of attributes
+ modeled in two different covariant normal terms (the rotations in these
+ cases should be 0.0).
+
+ 25. autoclass-c/prog/intf-reports.c -
+ Previously when specifying report_type = "xref_case" or
+ report_type = "xref_class" along with n_clsfs > 1 or clsf_n_list with
+ more than 1 list element, the .case-text-n or .class-text-n data would
+ be identical. Sometimes segmentation faults would occur. This has
+ been corrected. This was not a problem for report_type = "all"
+ (the default). Also when using the default for report_type ("all"),
+ previously the memory allocated for each classification's cross
+ reference was not deallocated after each classification was processed.
+ It is now properly deallocated.
+
+
+
+======================================================================
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ AUTOCLASS C VERSION 3.1 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/data/tests.c -
+ Reconfigure parameter values for the checkpointing test case.
+
+ 2. autoclass-c/data/glass/glassc-chkpt.s-params -
+ Include checkpoint test param settings from tests.c
+
+ 3. autoclass-c/data/autos/* -
+ Add input data files for last **non**-random trial test of
+ autoclass-c/data/tests.c
+
+ 4. autoclass-c/doc/prediction-c.text -
+ Add text concerning handling of "test" cases which are not
+ predicted to be in any of the "training" classes.
+
+ 5. autoclass-c/doc/reports-c.text -
+ Add new reports param: comment_data_headers_p, which prefixes
+ the "#" comment character to all lines except the minimum for
+ selective parsing.
+
+ Add new reports param: max_num_xref_class_probs, which determines
+ how many lessor class probabilities will be printed for the case
+ and class cross-reference reports. The default value is 5.
+
+ Add new report params: start_sigma_contours_att &
+ stop_sigma_contours_att. This adds the capability to compute sigma
+ class contour values for specified pairs of real valued attributes,
+ when generating the influence values report with the data option
+ (report_mode = "data"). See section "Generating Sigma Contour Values".
+
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 3.1.
+
+ 2. autoclass-c/prog/io-results.c -
+ In VALIDATE_RESULTS_PATHNAME, handle checkpoint files similarly
+ to results files: determine if they are ascii or binary, rather than
+ assuming they are binary. This was only a problem when .s-params
+ parameters save_compact_p = false, and read_compact_p = false.
+
+ In GET_CLSF_SEQ, handle checkpoint files similarly to results files.
+ This fix now allows checkpoint files to be loaded for reconvergence.
+
+ 3. autoclass-c/prog/intf-reports.c, autoclass.h -
+ In XREF_GET_DATA, allocate memory for collector once for each case,
+ rather than n_classes times. This fix now permits reports to be
+ generated for data sets of 100,000 cases and more, without causing
+ a segmentation fault. Eliminate ATTR_ALLOC_INCREMENT, and allocate
+ once for all discrete, and once for all real report attributes, if
+ needed, rather than invoking malloc/realloc for each report attribute.
+
+ 4. autoclass-c/prog/intf-reports.c, autoclass.h -
+ In AUTOCLASS_REPORTS, pass prediction_p to CASE_CLASS_DATA_SHARING,
+ so that XREF_GET_DATA can flag "test" cases which are not predicted
+ in be in any of the "training" classes. Put them in class -1.
+ This is only functional for "autoclass -predict ..." runs. The
+ following message will appear in the screen output for each case that
+ is not a member of any of the "training" classes:
+ xref_get_data: case_num xxx => class 9999
+ Class 9999 members will appear in the "case" and "class" cross-
+ reference reports.
+
+ 5. autoclass-c/prog/intf-influence-values.c -
+ In INFLUENCE_VALUE, do not process attribute values which have
+ null translations. This occurs when the user supplies an excessive range
+ value in .hd2, and ignores the warning to correct it. This prevents
+ a segmentation fault.
+
+ 6. autoclass-c/prog/struct-data.c -
+ In EXPAND_DATABASE, make error msg more informative.
+
+ 7. autoclass-c/prog/autoclass.h, intf-reports.c, intf-extenstions.c,
+ search-control-2.c -
+ Implement new reports param "comment_data_headers_p", which prefixes
+ the "#" comment character to all lines except the minimum for selective
+ parsing.
+
+ 8. autoclass-c/prog/io-read-data.c -
+ In OUTPUT_REAL_ATT_STATISTICS, add error check for attribute variance
+ exceeding infinity. This situation is caused by "out-liers" with very large
+ deviations from the other attribute values, and usually means that these
+ attribute values are erroneous. AutoClass C can not proceed in this
+ situation.
+
+ 9. autoclass-c/prog/intf-reports.c -
+ In the influence values report for multi_normal_cn models, when there
+ are more than one covariant normal correlation matrix, print all of them
+ for each class, not just the one for the least significant attribute of
+ the current class. Changes to FORMAT_ATTRIBUTE & FORMAT_REAL_ATTRIBUTE.
+
+ 10. autoclass-c/prog/intf-reports.c -
+ In the case cross-reference report (report_type = "xref_case")
+ generated with the data option (report_mode = "data"), other class
+ probabilities are now printed, if their values are greater than or
+ equal to 0.001, and there are not more than (MAX_NUM_XREF_CLASS_PROBS - 1)
+ of them. Changes to XREF_PAGINATE_BY_CASE, & XREF_OUTPUT_PAGE_HEADERS.
+
+ 11. autoclass-c/prog/intf-reports.c -
+ In the case and class cross-reference reports, the print out of
+ probabilities has increased by one significant digit (0.04 => 0.041);
+ and the minimum value printed is now 0.001, rather than 0.01.
+ The maximum number of lessor probabilities printed out is
+ (MAX_NUM_XREF_CLASS_PROBS - 1). Changes to XREF_PAGINATE_BY_CASE, &
+ XREF_OUTPUT_LINE_BY_CLASS.
+
+ 12. autoclass-c/prog/intf-reports.c -
+ Add new report parameter MAX_NUM_XREF_CLASS_PROBS, which determines
+ how many lessor class probability values will be printed in the
+ case and class cross-reference reports.
+
+ 13. autoclass-c/load-ac, autoclass-c/prog/autoclass.make.*,
+ autoclass-c/prog/autoclass.h, intf-sigma-contours.c, intf-reports.c -
+ Add capability to compute sigma class contour values for
+ specified pairs of real valued attributes, when generating the
+ influence values report with the data option (report_mode = "data").
+ Add new report params start_sigma_contours_att & stop_sigma_contours_att.
+
+
+
+======================================================================
+
+
+
+
+
+
+
+
+
+
+ AUTOCLASS C VERSION 3.0 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/search-c.text, reports-c.text --
+ New parameter for .s-params & .r-params files: free_storage_p.
+ The default value tells AutoClass to free the majority of its
+ allocated storage. If specified as false, AutoClass will not
+ attempt to free storage.
+
+ 2. autoclass-c/doc/preparation-c.text -
+ Correct typos "looses" and "scaler".
+
+ 3. autoclass-c/doc/reports-c.text --
+ New parameter for .r-params files: report_mode. It specifies
+ the mode of the reports to generate. The default, "text", is
+ the current formatted text layout. The new "data" option has a
+ parsable numerical layout -- suitable for further processing.
+
+ 4. autoclass-c/sample/read.me.c, scriptc.text, imports-85c.influ-o-data-1,
+ imports-85c.case-data-1, imports-85c.class-data-1
+ Updated the sample classification for report_mode = "data"
+ reports.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 3.0.
+
+ 2. autoclass-c/prog/autoclass.h, io-results.c, io-results-bin.c, struct-class.c,
+ struct-clsf.c, struct-model.c
+ Correct improper pointer casting:
+ fprintf(stdout, "free_model(%d): %d\n", i_model, (int) model);
+ to
+ fprintf(stdout, "free_model(%d): %p\n", i_model, (void *) model);
+ which generates compiler warnings on 64-bit architectures.
+
+ Change prototype for list_class_storage & list_clsf_storage from
+ int * to void **.
+
+ 3. autoclass-c/prog/search-control.c, intf-reports.c -
+ Process new params option: free_storage_p.
+
+ 4. autoclass-c/prog/search-control-2.c -
+ Correct formatted message typos "print print" and "estiamte" in
+ PRINT_INITIAL_REPORT.
+
+ 5. autoclass-c/prog/intf-reports.c -
+ In PRE_FORMAT_ATTRIBUTES, check for num_terms > 0 prior to calling
+ SORT_MNCN_ATTRIBUTES.
+
+ 6. autoclass-c/prog/io-read-data.c -
+ In READ_LINE, only return FALSE if no chars have been read -- allows
+ last line with no new-line to be read correctly.
+
+ 7. autoclass-c/prog/getparams.c -
+ Correct GETPARAMS for INT_LIST: to allow "= 84, 92 " to be read
+ as 84 & 92, rather than 84 & 84. Also allows "n_clsfs = 2 " to be
+ read properly.
+
+ 8. autoclass-c/prog/autoclass.h, intf-reports.c -
+ Implement "report_mode" parameter.
+
+ 9. autoclass-c/prog/io-read-data.c -
+ In PROCESS_ATTRIBUTE_DEF, check for incomplete discrete and
+ real attribute definitions.
+
+ 10. autoclass-c/load-ac -
+ Use "/bin/uname -s" to determine if host is running IRIX (SGI).
+
+ 11. autoclass-c/prog/struct-class.c -
+ In FREE_TPARM_DS, allow tparm->tppt to be UNKNOWN or IGNORE.
+ If not matched, print advisory msg, not error msg. Do not abort.
+
+ 12. autoclass-c/prog/autoclass.h, search-basic.c, model-expander-3.c,
+ struct-class.c, struct-clsf.c, predictions.c, & search-control-2.c
+ When creating the weights for a new class, use database->n_data
+ for the appropriate data base, rather than model->database->n_data.
+ In the "prediction" mode, this correctly builds the test database
+ class weights using the size of the test database, rather than
+ that of the training database -- which is pointed to by the model.
+ Functions modified: SET_UP_CLSF, GET_CLASS, CLASS_MERGED_MARGINAL_FN,
+ COPY_CLASS_DS, ADJUST_CLSF_DS_CLASSES, COPY_CLSF_DS, POP_CLASS_DS,
+ BUILD_CLASS_DS, COPY_TO_CLASS_DS, AUTOCLASS_PREDICT, &
+ PRINT_SEARCH_TRY. This corrects a segmentation fault which occured
+ during storage deallocation of prediction runs.
+
+======================================================================
+
+
+
+
+
+ AUTOCLASS C VERSION 2.9 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/search-c.text, reports-c.text -
+ Added new parameter for both ".s-params" & ".r-params"
+ files: break_on_warnings_p. The default value asks the user
+ whether to continue or not when data definition warnings are found.
+ If specified as false, then AutoClass will continue, despite
+ warnings -- the warning will continue to be output to the terminal
+ and, in the case of the "-search" option, to the log file.
+
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 2.9.
+
+ 2. autoclass-c/prog/autoclass.make.solaris.cc -
+ Correct compiler options for SC4.1 cc compiler
+
+ 3. autoclass-c/prog/intf-reports.c -
+ In FORMAT_DISCRETE_ATTRIBUTE, correct bad test for
+ warn_errs->single_valued_warning. This prevented "discrete"
+ type attributes from being displayed in the influence values
+ report, because a segmentation fault occurred. This
+ problem was introduced in version 2.8.
+
+ 4. autoclass-c/prog/autoclass.h -
+ In STRUCT I_REAL, type "last_sorted_term_n_att" as int,
+ not float.
+
+ 5. autoclass-c/prog/intf-reports.c -
+ In SORT_MNCN_ATTRIBUTES, type "last_sorted_term_n_att"
+ as int, not float. In PRE_FORMAT_ATTRIBUTES, revise logic for
+ computing/printing the correlation matrix. Items 4. & 5
+ correct the problem of the correlation matrix for attribute
+ type multi_multinomial_cn is only printed if it is last in
+ the sorted list of attributes.
+
+ 6. autoclass-c/prog/struct-data.c -
+ In EXPAND_DATABASE, make cosmetic change to an error message.
+
+ 7. autoclass-c/prog/intf-reports.c -
+ In XREF_GET_DATA, validity check that .r-params file values
+ of xref_class_report_att_list are in the range 0 - (number of
+ attributes - 1). Prevents segmentation fault.
+ Also in XREF_GET_DATA, correct syntax and location of
+ memory free command. Prevents segmentation fault when processing
+ discrete type data. This problem was introduced in version 2.8.
+
+ 8. autoclass-c/prog/getparams.h -
+ Increased MAXPARAMS to 40.
+
+ 9. autoclass-c/prog/search-control.c, intf-reports.c -
+ Added code to parse the new parameter: break_on_warnings_p.
+
+ 10. autoclass-c/prog/autoclass.h, intf-reports.c -
+ Compute last_clsf_p and pass to XREF_GET_DATA, so that freeing
+ of data will not be done until all clsfs have been processed.
+ To take advantage of the memory reduction, only process one clsf
+ and a time -- this applies only to very large data sets.
+
+
+
+======================================================================
+
+
+
+
+
+ AUTOCLASS C VERSION 2.8 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/search-c.text -
+ Add new search parameter "read_compact_p", which directs
+ AutoClass to read the "results" and "checkpoint" files in
+ either binary format -- ".results-bin"/".chkpt-bin"
+ (read_compact_p = true); or ascii format -- ".results"/".chkpt"
+ (read_compact_p = false). The default is read_compact_p = true.
+
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 2.8.
+
+ 2. autoclass-c/prog/io-results.c -
+ In "validate_data_pathname", prefer the user supplied file
+ extension, and only attempt to open ".db2", and then
+ ".db2-bin", if no extension (/name.) or an invalid extension
+ is supplied. Check for presence of '.' in pathname.
+
+ In "validate_results_pathname" prefer the user supplied file
+ extension, and only attempt to open ".result-bin", and then
+ ".results", if no extension (/name.) or an invalid extension
+ is supplied. Check for presence of '.' in pathname.
+
+ In "make_and_validate_pathname" check for presence of '.' in
+ pathname.
+
+ In "get_clsf_seq" simplify the test for "ascii" or "binary"
+ results file format -- also more portable.
+
+ 3. autoclass-c/prog/search-control.c -
+ In "autoclass_search" use make_and_validate_pathname and
+ search parameter "save_compact_p" to determine file extension
+ of "results" file prior to calling validate_results_pathname.
+
+ Add "read_compact_p" search parameter for use in reading
+ "results" and "checkpoint" files.
+
+ Make short search trial printout more portable.
+
+ 4. autoclass-c/load-ac; autoclass-c/prog/autoclass.make.*
+ Define make files with -I and -L parameters for SunOS 4.1.3 and
+ change naming convention: .sun. => .sunos. or .solaris.
+ Specifically the files are now --
+ autoclass.make.solaris.cc, autoclass.make.solaris.gcc,
+ autoclass.make.sunos.acc, and autoclass.make.sunos.gcc
+
+ 5. autoclass-c/prog/io-read-data.c, autoclass.h -
+ In "translate_discrete", allocate space for translations
+ using (strlen( value) + 1), rather than sizeof(shortstr) --
+ prevents corruption of discrete data translation tables when
+ translations are longer than (SHORT_STRING_LENGTH - 1) = 40
+ characters. In "get_line_tokens" and "read_from_string", add
+ length checking for "form"; make it and length check for
+ "datum_string" explicit. Increase output string length in
+ "output_created_translations".
+
+ 6. autoclass-c/prog/io-read-data.c, autoclass.h -
+ Increase from 3000 to 20000 the value of VERY_LONG_STRING_LENGTH
+ to handle very large datum lines.
+
+ 7. autoclass-c/prog/io-results.c -
+ In VALIDATE_RESULTS_PATHNAME and VALIDATE_DATA_PATHNAME,
+ use binary_file, rather than file, were it is intended.
+
+ 8. autoclass-c/prog/intf-reports.c, io-read-data.c, autoclass.h -
+ Increase DATA_ALLOC_INCREMENT from 100 to 1000 for reading very
+ large datasets. Add DATA_ALLOC_INCREMENT logic of READ_DATA
+ to XREF_GET_DATA. This will prevent segmentation faults
+ encountered when reading very large .db2 files into the
+ reports processing function of AutoClass.
+
+ 9. autoclass-c/prog/autoclass.make.solaris.cc, autoclass.make.solaris.gcc,
+ autoclass.make.sunos.acc, and autoclass.make.sunos.gcc -
+ Comment out "depend: $(SRCS)", so that all source files are not
+ compiled even when only one file changes.
+
+ 10. autoclass-c/prog/intf-reports.c -
+ In FORMAT_DISCRETE_ATTRIBUTE, do not process attributes with
+ warning or error messages -- this prevents segmentation faults.
+
+ In XREF_GET_DATA, free database allocated memory after it is
+ transferred into report data structures. This reduces the
+ amount of memory required when generating reports for very
+ large data bases, and prevents running out of memory.
+
+ In all functions calling malloc/realloc for dynamic memory
+ allocation, checks have been added to notify the user if
+ memory is exhausted.
+
+ 11. autoclass-c/load-ac & autoclass-c/prog/autoclass.make.hp.cc -
+ Port the "make" file for HP-UX operating system using the
+ bundled "cc" compiler.
+
+
+======================================================================
+
+
+
+
+
+ AUTOCLASS C VERSION 2.7 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/search-c.text -
+ Add documentation for search parameter "interactive_p". This
+ will allow AutoClass to be run as a background task, since it
+ will not be querying standard input for the "quit" character.
+
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 2.7. Add "G_interactive_p".
+
+ 2. autoclass-c/prog/globals.h -
+ Add "G_interactive_p".
+
+ 3. autoclass-c/prog/utils.c -
+ In "char_input_test", test for "G_interactive_p" -- if false,
+ do not do the test.
+
+ 4. autoclass-c/prog/search-control.c -
+ In "autoclass_search", process "interactive_p" from the
+ search parameters file, and output advisory message if set to
+ false.
+
+ 5. autoclass-c/prog/search-control-2.c -
+ In "print_initial_report", notify user that "typing q to quit"
+ is not functional when "interactive_p" = false.
+
+======================================================================
+
+
+
+
+
+ AUTOCLASS C VERSION 2.6 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 2.6.
+
+ 2. autoclass-c/prog/model-transforms.c -
+ In "generate_singleton_transform", correct segmentation fault
+ which occurs when more than 50 type = real, subtype = scalar
+ attributes are defined in the ".hd2" & ".model" files.
+
+ In "log_transform", use "safe_log" to transform values --
+ prevent "log: SING error" error messages.
+
+ 3. autoclass-c/prog/model-expander-3.c -
+ In "check_term", since att_info can be realloc'ed in
+ for transformed attributes, reset data_base->att_info for each
+ time thru loop.
+
+ 5. autoclass-c/prog/utils-math.c -
+ Add "safe_log".
+
+ 6. autoclass-c/prog/autoclass.h -
+ Add function prototype for "safe_log".
+
+ 7. autoclass-c/prog/model-multi-normal-cn.c -
+ In "multi_normal_cn_model_term_builder" change log calls to
+ safe_log to prevent "log: SING error" error messages.
+
+ 8. autoclass-c/prog/model-single-normal-cm.c -
+ In "build_sn_cm_priors" and "single_normal_cm_model_term_builder"
+ change log calls to safe_log to prevent "log: SING error" error
+ messages.
+
+ 9. autoclass-c/prog/model-single-normal-cn.c -
+ In "build_sn_cn_priors" and "single_normal_cn_model_term_builder"
+ change log calls to safe_log to prevent "log: SING error" error
+ messages.
+
+ 10. autoclass-c/prog/search-control.c -
+ In "autoclass_search" test for user overriding of search parameters
+ randomize_random_p and/or start_fn_type. If done, ask for
+ confirmation to proceed.
+
+
+======================================================================
+
+
+
+
+
+ AUTOCLASS C VERSION 2.5 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/reports-c.text -
+ Minor typographical changes. Added new report generation
+ parameter: order_attributes_by_influence_p. Its default
+ value is true. The file extension of the influence values
+ report has been changed from ".influ-text-1" to ".influ-o-text-1"
+ when order_attributes_by_influence_p = true, and to
+ ".influ-no-text-1" when order_attributes_by_influence_p = false.
+
+ 2. autoclass-c/doc/interpretation-c.text -
+ Minor changes to the text.
+
+ 3. autoclass-c/sample/imports-85c.influ-o-text-1
+ Influence values report has been significantly revised and
+ reformatted.
+
+ 4. autoclass-c/doc/search-c.text -
+ Corrected definition of fixed_j.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/globals.c -
+ Update "G_ac_version" to 2.5.
+
+ 2. autoclass-c/prog/intf-reports.c, utils.c -
+ Formatting change to "format_real_attribute" for multiple
+ multivariate attribute groups.
+
+ Remove covariance matrix output and reformat the correlation
+ matrix output to fixed decimal point notation.
+
+ For the influence values report, sort real valued attributes
+ of the same model group by the first significance value, if
+ that group is multi_normal_cn.
+
+ For discrete attributes: relabel the headers "Prob", rather than
+ "Mean"; and correct the instance value significance computation
+ to be "local_prob * log( local_prob / global_prob)".
+
+ 3. autoclass-c/prog/autoclass.h -
+ Add #ifndef for MAXPATHLEN.
+
+ 4. autoclass-c/prog/io-results.c -
+ In "validate_data_pathname", "validate_results_pathname", &
+ "make_and_validate_pathname", only do fclose, if fopen returns
+ non-NULL.
+
+ 5. autoclass-c/prog/search-control-2.c -
+ Add "pad" argument to "print_search_try".
+
+ 6. autoclass-c/prog/intf-extensions.c -
+ Formatting change to "get_models_source_info".
+
+ 7. autoclass-c/load-ac,
+ autoclass-c/prog/autoclass.make.sun.gcc, autoclass.make.sun.acc,
+ autoclass.make.sun.cc, autoclass.make.sgi -
+ (remove autoclass.make.sun)
+ Add SunOS/Solaris C compiler support.
+
+ 8. autoclass-c/prog/io-results.c, io-read-model.c, io-read-data.c,
+ utils.c, intf-reports.c, getparmas.c -
+ Cast return values of "strlen" to int.
+
+ 9. autoclass-c/prog/model-transforms.c -
+ In "generate_singleton_transform", correct segmentation fault
+ which occurs when more than 25 type = real, subtype = scalar
+ attributes are defined in the ".hd2" & ".model" files.
+
+ 10. autoclass-c/prog/struct-data.c, io-results.c, io-results-bin.c -
+ Properly initialize att_info array when it exceeds preallocated
+ size.
+
+ 11. autoclass-c/load-ac,
+ autoclass-c/prog/autoclass.make.linux.gcc, autoclass.make.sun.*,
+ autoclass.make.sgi, fcntlcom-ac.h -
+ Thanks to Andrew Lewycky <plewycky@oise.on.ca>, added mods for
+ port to Linux version 1.2.10, GCC version 2.5.8, libc version
+ 4.6.25.
+
+ 12. autoclass-c/prog/model-single-multitnomial.c -
+ In "sm_params_influence_fn", add check for out-of-bounds
+ arguments to the log function to prevent "log domain" errors.
+
+
+======================================================================
+
+
+
+
+
+ AUTOCLASS C VERSION 2.0 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass-c/doc/search-c.text -
+ Added new ".s-params" parameter screen_output_p, whose default
+ value is true. If false, no output is directed to the screen.
+ Assuming log_file_p = true, output will be directed to the
+ log file only.
+
+ 2. autoclass-c/doc/introduction-c.text, & prediction-c.text -
+ Added "prediction-c.text" to document the prediction mode of
+ AutoClass C, which uses a "training" classification to predict
+ probabilistic class membership for the cases of a "test" data
+ file.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/io-results.c -
+ In "read_class_DS_s", add debugging info to use with
+ G_clsf_storage_log_p.
+
+ 2. autoclass-c/prog/struct-class.c -
+ In "build_class_DS", add debugging info to use with
+ G_clsf_storage_log_p.
+
+ 3. autoclass-c/prog/io-results-bin.c -
+ In "load_class_DS_s," add debugging info to use with
+ G_clsf_storage_log_p.
+
+ 4. autoclass-c/prog/struct-data.c -
+ In "expand_database", to handle partial databases, read
+ G_s_params_n_data.
+
+ 5. autoclass-c/prog/globals.c, globlals.h, search-control.c -
+ Add G_s_params_n_data. Change G_ac_version to 2.0 in
+ globals.c.
+
+ 6. autoclass-c/prog/io-read-data.c, autoclass.h -
+ In "read_data" test on n_data was off by 1.
+
+ In "output_created_translations" add discrete value occurrance
+ count.
+
+ In "read_data" move "output_created_translations" call to
+ "output_att_statistics".
+
+ Add "output_att_statistics" & "output_real_att_statistics".
+
+ In "create_warn_err_ds", move malloc out of declaration.
+
+ 7. autoclass-c/prog/prints.c, autoclass.h -
+ Add "sum_vector_f" for debugging.
+
+ 8. autoclass-c/prog/autoclass.c -
+ Make "main" arg list conform to ANSI C.
+
+ 9. autoclass-c/prog/model-transforms.c -
+ In "generate_singleton_transform", call "output_real_att_statistics"
+ for each transformed attribute.
+
+ 10. autoclass-c/prog/utils.c -
+ In "randomize_list" do limit check on list index.
+
+ 11. autoclass-c/prog/search-control-2.c -
+ In all convergence functions, allocate mallocs in body of function,
+ rather than in local variable declarations.
+
+ In "get_search_DS", move malloc out of declaration.
+
+ 12. autoclass-c/prog/intf-reports.c -
+ In "xref_get_data", use n_real_att - 1, rather than i, for index to
+ real_attribute_data; and n_discrete_att - 1 for discrete_attribute_data.
+ Corrects garbage output when .r-params parameter
+ "xref_class_report_att_list" contains mixed real and discrete attributes.
+
+ In "xref_class_report_attributes", use %g, rather than %f for real data.
+
+ In "xref_output_line_by_class", handle unknown real values.
+
+ 13. autoclass-c/prog/io-read-data.c, io-results.c, io-results-bin.c,
+ fcntlcom-ac.h -
+ Convert binary i/o from non-standard (open/close/read/write) to
+ ANSI (fopen/fclose/fread/fwrite).
+
+ 14. autoclass-c/prog/search-control.c, search-basic.c, search-control-2.c,
+ utils.c, globals.c, globals.h, init.c -
+ Convert from srand/rand to srand48/lrand48 for random number
+ generation.
+
+ 15. autoclass-c/prog/predictions.c -
+ Add this file to implement the "autoclass -predict ..." capability,
+ which allows cases in a "test" data set to be applied to a
+ "training" data set and have their class membership predicted.
+ Use "prediction_p" and global "G_training_clsf" in "io-read-data.c"
+ to force the "test" database to use the same discrete translations
+ as the "training" database.
+
+ 16. autoclass-c/load-ac; autoclass-c/prog/autoclass.c, autoclass.make,
+ io-results.c, & autoclass.h -
+ Changes to support item 15.
+
+ 17. autoclass-c/prog/struct_data.c, struct-clsf.c, & struct-model.c -
+ In "att_ds_equal_p", check for type = dummy. Remove
+ "db_DS_same_source_p" and use "db_same_source_p", instead.
+
+ 18. autoclass-c/prog/search-control.c -
+ Make FILE * type local variables static, since they are passed to other
+ functions.
+
+ 19. autoclass-c/prog/autoclass.make -
+ Compile code with "-g", rather than "-ggdb" option.
+
+ 20. autoclass-c/load-ac & autoclass-c/prog/autoclass.make.sun,
+ autoclass-c/prog/autoclass.make.sgi -
+ Changes to support SGI IRIX version 5.2 with "cc" compiler.
+
+ 21. autoclass-c/prog/io-read-data.c -
+ In "output_warning_msgs", replaced sizeof(msg) with msg_length
+ in first safe_sprintf call to prevent:
+ "ERROR: vsprintf produced 80 chars (max number is 3) -- called
+ by output_warning_msgs
+ Program received signal SIGABRT, Aborted."
+
+======================================================================
+
+
+
+
+
+ AUTOCLASS C VERSION 1.5 NOTES
+======================================================================
+======================================================================
+
+Documentation:
+------------------------------
+
+ 1. autoclass/doc/introduction-c.text, kdd-95.ps, tr-fia-90-12-7-01.ps -
+ Postscript papers are now included as postscript, instead of
+ uuencoded postscript.
+
+ 2. autoclass/doc/preparation-c.text -
+ Added binary data file input option.
+
+
+Programming:
+------------------------------
+
+ 1. autoclass-c/prog/autoclass.c -
+ In "main", call "validate_data_pathname" to allow either .db2 ("ascii")
+ or .db2-bin ("binary") data file extensions. The identifying header of
+ a .db2-bin file is
+ - ".db2-bin" - char[8]
+ - 32-bit integer with byte-length of each data case.
+ The data cases follow in binary "float" format -- 32 bit fields.
+
+ 2. autoclass-c/prog/io-results.c -
+ Add "validate_data_pathname".
+
+ 3. autoclass-c/prog/autoclass.h -
+ Function prototype definition change/addition. Add DATA_BINARY_FILE_TYPE.
+ Change character array variables of length MAX_PATHNAME_LENGTH (81) to
+ variables of type fxlstr (length 160) to handle very long file
+ pathnames. #define M_PI if not defined -- needed under Solaris. Use
+ pow rather than exp2, since exp2 not available under Solaris gcc 2.6.3.
+
+ 4. autoclass-c/prog/io-read-data.c -
+ In "read_database" change NULL to FALSE, so that int/int rather than
+ int/ptr comparison is made. Detected by Solaris GNU gcc.
+
+ "read_database", "read_data" and "read_database_doit" modified to handle
+ binary data files.
+
+ 5. autoclass-c/prog/globals.h, globals.c -
+ Add G_data_file_format.
+
+ 6. autoclass-c/prog/search-control.c -
+ In "autoclass-search" do not open/close ".db2" data file.
+
+ Check for non-NULL "best_clsfs" prior to writing ".results[-bin]"
+ file.
+
+ 7. autoclass-c/prog/struct-data.c -
+ In "expand_database", call "validate_data_pathname" to allow either
+ .db2 ("ascii") or .db2-bin ("binary") data file extensions.
+
+ 8. autoclass-c/prog/search-basic.c -
+ Modified "generate_clsf"'s call to "read_database".
+
+ 9. autoclass-c/prog/utils.c, io-read-data.c, io-results-bin.c & io-results.c -
+ Since the include file <sys/fcntlcom.h> is not available in the Solaris
+ GNU gcc implementation, hard code them in "fcntlcom-ac.h". Solaris 2.4
+ fails open, unless fopen/fclose is done first.
+
+ 10. autoclass-c/load-ac -
+ Add "fcntlcom-ac.h". Use "clean" make target.
+
+ 11. autoclass-c/prog/search-control-2.c -
+ In "print_report", do not use NULL as value of delta_ln_p.
+
+ In "print_final_report", corrected the overwriting of a string array
+ in cases where long pathnames are used.
+
+ 12. autoclass-c/prog/utils.c, intf-reports.c, search-control.c, & getparams.c -
+ Correct compiler warnings found by Solaris gcc version 2.6.3.
+
+ 13. autoclass-c/prog/init.c -
+ In "init", use getcwd, rather than getwd for Solaris compatibility.
+
+ 14. autoclass-c/prog/autoclass.make -
+ Include "clean" target. Add compiler options "-pedantic -Wall".
+
+ 15. autoclass-c/prog/utils.c -
+ Add "safe_sprintf", and use it in other programs in lieu of "sprintf"
+ to detect string overwrites. Corrected string overwrite which caused
+ abort and the message "Premature end of file reading symbol table".
+
+ 16. autoclass-c/prog/intf-reports.c -
+ In "search_summary" change search->n to search->n_tries to prevent
+ segment violation when there are duplicates.
+
+======================================================================
+
+
+
--- autoclass-3.3.6.orig/prog/utils.c
+++ autoclass-3.3.6/prog/utils.c
@@ -17,7 +17,7 @@
#include "autoclass.h"
#include "minmax.h"
#include "globals.h"
-#include "fcntlcom-ac.h"
+#include "fcntl.h"
/* SUPRESS CODECENTER WARNING MESSSAGES */
@@ -564,12 +564,12 @@
return (FALSE);
fcntl_flags = fcntl( stdin_fd, F_GETFL );
- fcntl_flags |= O_NDELAY;
+ fcntl_flags |= O_NONBLOCK;
fcntl( stdin_fd, F_SETFL, fcntl_flags );
c = getc(stdin);
- fcntl_flags &= ~O_NDELAY;
+ fcntl_flags &= ~O_NONBLOCK;
fcntl( stdin_fd, F_SETFL, fcntl_flags );
if (c == '~')
--- autoclass-3.3.6.orig/prog/intf-reports.c
+++ autoclass-3.3.6/prog/intf-reports.c
@@ -1594,7 +1594,8 @@
fprintf( influence_report_fp, "\f\n\nCLASS LISTINGS:\n\n"
" These listings are ordered by class weight --\n"
" * j is the zero-based class index,\n"
- " * k is the zero-based attribute index, and\n"
+ " * k is the zero-based attribute index, and\n");
+ fprintf( influence_report_fp,
" * l is the zero-based discrete attribute instance index.\n\n"
" Within each class, the covariant and independent model terms are ordered\n"
" by their term influence value I-jk.\n\n");
@@ -1602,7 +1603,8 @@
" Covariant attributes and discrete attribute instance values are both\n"
" ordered by their significance value. Significance values are computed\n"
" with respect to a single class classification, using the divergence from\n"
- " it, abs( log( Prob-jkl / Prob-*kl)), for discrete attributes and the\n"
+ " it, abs( log( Prob-jkl / Prob-*kl)), for discrete attributes and the\n");
+ fprintf( influence_report_fp,
" relative separation from it, abs( Mean-jk - Mean-*k) / StDev-jk, for\n"
" numerical valued attributes. For the SNcm model, the value line is\n"
" followed by the probabilities that the value is known, for that class\n");
--- autoclass-3.3.6.orig/prog/io-results-bin.c
+++ autoclass-3.3.6/prog/io-results-bin.c
@@ -595,6 +595,9 @@
((expand_list[0] != END_OF_INT_LIST) &&
(member_int_list( clsf_index+1, expand_list) == TRUE)))) {
expand_clsf( clsf, want_wts_p, update_wts_p);
+ if(first_clsf && clsf && (first_clsf->models != clsf->models)) {
+ first_clsf->models = clsf->models;
+ }
/* fprintf( stderr, "clsf index %d expanded\n", clsf_index); */
}
--- autoclass-3.3.6.orig/prog/Makefile
+++ autoclass-3.3.6/prog/Makefile
@@ -0,0 +1,42 @@
+### AUTOCLASS C MAKE FILE FOR Linux version 1.2.10, GCC version 2.5.8,
+### and libc version 4.6.25.
+
+### WHEN ADDING FILES HERE, ALSO ADD THEM TO LOAD-AC ###
+
+## THE FIRST CHARACTER OF EACH commandList must be tab
+# targetList: dependencyList
+# commandList
+## evaluate (setq-default indent-tabs-mode t)
+
+# optimize & debug - stay with IEEE compliance
+CFLAGS = $(OSFLAGS) -ansi -pedantic -Wall -O2 -fno-fast-math -g
+
+CC = gcc
+
+DEPEND =
+
+SRCS = globals.c init.c io-read-data.c io-read-model.c io-results.c \
+ io-results-bin.c model-expander-3.c matrix-utilities.c \
+ model-single-multinomial.c model-single-normal-cm.c \
+ model-single-normal-cn.c model-multi-normal-cn.c \
+ model-transforms.c model-update.c search-basic.c \
+ search-control.c search-control-2.c \
+ search-converge.c struct-class.c struct-clsf.c \
+ statistics.c predictions.c \
+ struct-data.c struct-matrix.c struct-model.c \
+ utils.c utils-math.c \
+ intf-reports.c intf-extensions.c intf-influence-values.c \
+ intf-sigma-contours.c \
+ prints.c getparams.c autoclass.c
+
+OBJS = $(SRCS:.c=.o)
+
+autoclass: $(OBJS)
+ $(CC) $(CFLAGS) -o autoclass $(OBJS) -lm -lc
+
+%.o : %.c
+ $(CC) $(CFLAGS) -c $< -o $@
+
+depend: $(SRCS)
+
+# IF YOU PUT ANYTHING HERE IT WILL GO AWAY