sid.cpp

This example shows how to use the SSpeakerID2I interface.

00001 /*****************************************************************
00002  *  BSAPI Speaker Identification Example                         *
00003  *                                                               *
00004  *  Author      : Petr Schwarz, Pavel Matejka, Tomas Cipr        *
00005  *  Copyright   : (C) 2006-2011 by Phonexia s.r.o                *
00006  *                                                               *
00007  *  For more info, please contact us at support@phonexia.com     *
00008  *****************************************************************/
00009 
00010 #include <stdio.h>
00011 #include <stdlib.h>
00012 #include <stdarg.h>
00013 #include <string.h>
00014 #include <assert.h>
00015 #include <errno.h>
00016 #include <math.h>
00017 #include <new>
00018 
00019 #include "bsapi.h"
00020 #include "getopt.h"
00021 
00022 #ifdef WIN32
00023   #define DIRSEP "\\"
00024 #else
00025   #define DIRSEP "/"
00026 #endif
00027 
00028 // compile time program options
00029 #define SID_MIN_LEN_TO_PROCESS    10.0f       // 10 seconds
00030 #define SID_BELOW_MIN_LEN_TEXT    "(too short)"
00031 #define SID_OUT_COLUMN_CHARS      "snlr"
00032 
00033 // default option values
00034 #define SID_DEF_WAVE_FMT          "alaw"
00035 #define SID_DEF_WAVE_EXT          "alw"
00036 #define SID_DEF_NCHANNELS         1
00037 #define SID_DEF_SCORE_SHARPNESS   1.0f
00038 
00039 // This is an error handling object. If an error occurs, a message is sent to this object at first. 
00040 // Then the running function will exit with the false or 0 return value. The object also accepts 
00041 // warning and logging messages.
00042 class ErrorHandler : public SErrorCallbackI 
00043 {
00044   public:
00045     ErrorHandler() : mVerbose(false) {;}
00046     virtual void BSAPI_METHOD OnTextMessage(SUnknownI *pSender, message_type type, unsigned int messageId, const char *pMessage)
00047     {
00048       unsigned int iid = pSender ? pSender->GetIID() : SIID_UNDEFINED;
00049       switch(type)
00050       {
00051         case mtError:
00052           fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00053           break;
00054         case mtWarning:
00055           fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00056           break;
00057         case mtLog:
00058           LogMessage(pMessage);
00059           break;
00060       }
00061     }
00062     
00063     void LogMessage(const char *pMessage, ...)
00064     {
00065       if (mVerbose)
00066       {
00067         va_list ap;
00068         va_start(ap, pMessage);
00069         vfprintf(stderr, pMessage, ap);
00070         fprintf(stderr, "\n");
00071         va_end(ap);
00072       }
00073     }
00074     
00075     void SetVerbose(bool verbose) {mVerbose = verbose;}
00076   
00077   protected:
00078     bool mVerbose;
00079 } gErrorHandler;
00080 
00081 void help()
00082 { 
00083   puts("\n Speaker identification                                         ");
00084   printf(" %s\n", BSAPIVersion());
00085   puts(" ================================================================ ");
00086   puts("                                                                  ");
00087   puts(" USAGE: sid [options]                                             ");
00088   puts("                                                                  ");
00089   puts(" system configuration:                                            ");
00090   puts("   -c file          configuration file                            ");
00091   puts("   -m dir           model directory                               ");
00092   puts("   -a str1,str2...  active speaker models                         ");
00093   puts("   -v               verbose mode                                  ");
00094   puts("                                                                  ");
00095   puts(" input:                                                           ");
00096   puts("   -i file          input file                                    ");
00097   puts("   -l file          list of input files                           ");
00098   puts("   -d dir           input directory                               ");
00099   puts("   -e str [alw]     extension of audio files                      ");
00100   puts("   -w fmt [alaw]    waveform format (lin16, lin8, alaw, mulaw)    ");
00101   puts("   -n num [1]       number of channels in audio files             ");
00102   puts("   -p start,len     active waveform part (in seconds)             ");
00103   puts("   -k file          use calibration file                          ");
00104   puts("                                                                  ");
00105   puts(" training:                                                        ");
00106   puts("   -t               enable training                               ");
00107   puts("   -g str           name of the speaker to be trained             ");
00108   puts("                                                                  ");
00109   puts(" calibration estimation:                                          ");
00110   puts("   -y file          make calibration file                         ");
00111   puts("                                                                  ");
00112   puts(" output:                                                          ");
00113   puts("   -s file          output score file                             ");
00114   puts("   -u               suppress the 'too short' output               ");
00115   puts("   -r               produce scores for all speakers               "); 
00116   puts("   -f columns       enable column output format (see columns)     ");
00117   puts("   -z num [1.0]     score sharpness (positive number)             ");
00118   puts("                                                                  ");
00119   puts(" columns:           (columns to print are specified by string of  ");
00120   puts("                     the characters below, e.g. lsn)              ");
00121   puts("    s               raw score                                     ");
00122   puts("    n               score normalized to <0, 100>                  ");
00123   puts("    l               speech length                                 ");
00124   puts("    r               record length                                 ");
00125   puts("                                                                  "); 
00126 }
00127 
00128 inline float RescaleScore(float score, float scale = 1.0f)
00129 {
00130   return (100.0f / (1.0f + expf(-(scale * score))));
00131 }
00132 
00133 // The line format is: input_file_name speaker [optional columns]
00134 void DumpColWise(const char *pInputName, char **ppNames, float *pScores, int nSpeakers, 
00135   float recordLength, float speechLength, float sharpness, const char *pColumnFmt, FILE *pFileHandle, bool suppressTooShort)
00136 {
00137   for (int i = 0; i < nSpeakers; i++)
00138   {
00139     fprintf(pFileHandle, "%s %s", pInputName, ppNames[i]);
00140     for (const char *pc = pColumnFmt; *pc != '\0'; pc++)
00141     {
00142       switch (*pc)
00143       {
00144         case 's':
00145           if (speechLength < SID_MIN_LEN_TO_PROCESS && !suppressTooShort)
00146             fprintf(pFileHandle, " -inf");
00147           else
00148             fprintf(pFileHandle, " %.3f", pScores[i]);
00149           break;
00150           
00151         case 'n':
00152           if (speechLength < SID_MIN_LEN_TO_PROCESS && !suppressTooShort)
00153             fprintf(pFileHandle, " 0.000");
00154           else
00155             fprintf(pFileHandle, " %.3f", RescaleScore(pScores[i], sharpness));
00156           break;
00157           
00158         case 'l':
00159           fprintf(pFileHandle, " %.3f", speechLength);
00160           break;
00161           
00162         case 'r':
00163           fprintf(pFileHandle, " %.3f", recordLength);
00164           break;
00165       }
00166     }
00167     
00168     if (speechLength < SID_MIN_LEN_TO_PROCESS && !suppressTooShort)
00169       fprintf(pFileHandle, " %s", SID_BELOW_MIN_LEN_TEXT);
00170     
00171     fprintf(pFileHandle, "\n");
00172   }
00173 }
00174 
00175 // The line format is: input_file_name score1 score2 ... scoreN
00176 // In addition, the first line will contain names of all speakers
00177 void DumpRowWise(const char *pInputName, char **ppNames, float *pScores, int nSpeakers, 
00178   float speechLength, float sharpness, FILE *pFileHandle, bool suppressTooShort)
00179 {
00180   static bool first_time = true;
00181   
00182   if (first_time)
00183   {
00184     if (nSpeakers > 0)
00185       fprintf(pFileHandle, "%s", ppNames[0]);
00186     for (int i = 1; i < nSpeakers; i++)
00187       fprintf(pFileHandle, " %s", ppNames[i]);
00188     fprintf(pFileHandle, "\n");
00189     first_time = false;
00190   }
00191 
00192   fprintf(pFileHandle, "%s", pInputName);
00193   for (int i = 0; i < nSpeakers; i++)
00194   {
00195     if (speechLength < SID_MIN_LEN_TO_PROCESS && !suppressTooShort)
00196       fprintf(pFileHandle, " 0.000");
00197     else
00198       fprintf(pFileHandle, " %.3f", RescaleScore(pScores[i], sharpness));
00199   }
00200   
00201   if (speechLength < SID_MIN_LEN_TO_PROCESS && !suppressTooShort)
00202     fprintf(pFileHandle, " %s", SID_BELOW_MIN_LEN_TEXT);
00203     
00204   fprintf(pFileHandle, "\n");
00205 }
00206 
00207 // This function outputs the winning speaker or scores for all of them
00208 bool DumpScore(const char *pInputName, SScoresI *pScores, bool dumpAllScores, 
00209   const char *pColumnFmt, float sharpness, FILE *pFileHandle, bool suppressTooShort)
00210 {
00211   /* !!! this should be moved to scores
00212   SWaveformFormatConvertorI *pwfconv = psid->GetWaveformFormatConvertor();
00213   if (!pwfconv)
00214     return false;
00215   */
00216 
00217   float speech_length = pScores->GetTestLength();
00218   float record_length = 0;
00219  
00220   
00221   if (dumpAllScores)
00222   {
00223     int num = pScores->GetNScores();
00224     char **ppnames = pScores->GetNames();
00225     float *pscores = pScores->GetScores();
00226 
00227     // the functions above can return 0 in case of an error
00228     if (!ppnames || !pscores) 
00229       return false;
00230     
00231     // 0th array element is for UBM, skip it
00232     if (pColumnFmt)
00233     {
00234       DumpColWise(pInputName, ppnames, pscores, num, record_length, 
00235         speech_length, sharpness, pColumnFmt, pFileHandle, suppressTooShort);
00236     }
00237     else
00238     {
00239       DumpRowWise(pInputName, ppnames, pscores, num, speech_length, 
00240         sharpness, pFileHandle, suppressTooShort);
00241     }
00242   }
00243   else
00244   {
00245     float score = 0.0f;
00246     char *pname = pScores->GetBestName(&score);
00247     
00248     if (!pname) 
00249       return false;
00250     
00251     DumpColWise(pInputName, &pname, &score, 1, record_length, 
00252       speech_length, sharpness, (pColumnFmt ? pColumnFmt : "n"), pFileHandle, suppressTooShort);
00253   }
00254   
00255   return true;
00256 }
00257 
00258 #ifdef PROCESS_WAVEFORM
00259 char *LoadFileToMem(const char *pFile, int *pLen)
00260 {
00261   // open the file
00262   FILE *pf;
00263   pf = fopen(pFile, "rb");
00264   if(!pf) 
00265     return 0;
00266 
00267   // get file length
00268   fseek(pf, 0, SEEK_END);
00269   size_t size = ftell(pf);
00270   rewind(pf);
00271 
00272   if(pLen)
00273     *pLen = static_cast<int>(size);
00274 
00275   char *pbuffer = new (std::nothrow) char [size + 1];
00276   if(!pbuffer) 
00277   {
00278     fclose(pf);
00279     return 0;
00280   }
00281 
00282   size_t nbytes = fread(pbuffer, 1, size, pf);
00283   if(nbytes != size) 
00284   {
00285     fclose(pf);
00286     delete [] pbuffer;
00287     return 0;
00288   }
00289   fclose(pf);
00290   pbuffer[size] = '\0';
00291   return pbuffer;
00292 }
00293 #endif
00294 
00295 int main(int argc, char *argv[])
00296 {
00297   // initial configuration variables
00298   const char *pconfig_file     = 0;
00299   const char *pmodel_dir       = 0;
00300   const char *pinput_file      = 0;
00301   const char *plist_file       = 0;
00302   const char *pinput_dir       = 0;
00303   const char *pwave_fmt        = SID_DEF_WAVE_FMT;
00304   const char *pwave_ext        = SID_DEF_WAVE_EXT;
00305   const char *poutput_file     = 0;
00306   const char *pcolumn_fmt      = 0;
00307   const char *pspeaker_name    = 0;
00308   const char *pactive_speakers = 0;
00309   const char *pin_calib_file   = 0;
00310   const char *pout_calib_file  = 0;
00311   bool training_mode           = false;
00312   bool dump_all_scores         = false;
00313   int  nchannels               = SID_DEF_NCHANNELS;
00314   float sharpness              = SID_DEF_SCORE_SHARPNESS;
00315   float acwf_start             = 0;
00316   float acwf_len               = 0;
00317   bool supp_too_short          = false;
00318 
00319   // command line parsing
00320   if(argc == 1)
00321   {
00322     help();
00323     return 0;
00324   }
00325 
00326   optind = 0;
00327   while (1)
00328   {
00329     int c = getopt(argc, argv, const_cast<char *>("c:m:i:l:d:e:n:w:s:a:o:trf:vg:k:y:p:z:u"));
00330     if(c == -1)
00331       break;
00332 
00333     switch(c)
00334     {
00335       case 'c':
00336         pconfig_file = optarg;
00337         break;
00338       case 'm':
00339         pmodel_dir = optarg;
00340         break;
00341       case 'i':
00342         pinput_file = optarg;
00343         break;
00344       case 'l':
00345         plist_file = optarg;
00346         break;
00347       case 'd':
00348         pinput_dir = optarg;
00349         break;
00350       case 'w':
00351         pwave_fmt = optarg;
00352         break;
00353       case 'e':
00354         pwave_ext = optarg;
00355         break;
00356       case 'n':
00357         if(sscanf(optarg, "%d", &nchannels) != 1 || nchannels < 1)
00358         {
00359           fprintf(stderr, "ERROR: Invalid number of channels: %s.\n", optarg);
00360           return 1;
00361         }
00362         break;
00363       case 's':
00364         poutput_file = optarg;
00365         break;
00366       case 'u':
00367         supp_too_short = true;
00368         break;
00369       case 'v':
00370         gErrorHandler.SetVerbose(true);
00371         break;
00372       case 't':
00373         training_mode = true;
00374         break;     
00375       case 'r':
00376         dump_all_scores = true;
00377         break;
00378       case 'f':
00379         pcolumn_fmt = optarg;
00380         if (strspn(pcolumn_fmt, SID_OUT_COLUMN_CHARS) != strlen(pcolumn_fmt))
00381         {
00382           fprintf(stderr, "ERROR: Wrong format of output columns string. "
00383           "The set of allowed characters is '%s'.\n", SID_OUT_COLUMN_CHARS);
00384           return 1;
00385         }
00386         break;
00387       case 'z':
00388         if(sscanf(optarg, "%f", &sharpness) != 1)
00389         {
00390           fprintf(stderr, "ERROR: Wrong value of score sharpness '%s'. Must be positive number.\n", optarg);
00391           return 1;
00392         }
00393         break;
00394       case 'g':
00395         pspeaker_name = optarg;
00396         break;
00397       case 'a':
00398         pactive_speakers = optarg;
00399         break;
00400       case 'k':
00401         pin_calib_file = optarg;
00402         break;
00403       case 'y':
00404         pout_calib_file = optarg;
00405         break;
00406       case 'p':
00407         if(sscanf(optarg, "%f,%f", &acwf_start, &acwf_len) != 2)
00408         {
00409           fprintf(stderr, "ERROR: Can not parse active waveform part '%s'\n", optarg);
00410           return 1;
00411         }
00412         break;
00413       case '?':
00414         fprintf(stderr, "ERROR: Command line parsing error.\n");
00415         return 1;
00416       default :
00417         fprintf(stderr, "ERROR: Command line parsing error. Unexpected argument '%s'.\n", optarg);
00418         return 1;
00419     }
00420   }
00421 
00422   // register license file for SSpeakerID2I
00423   SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00424   if (plicman)
00425   {
00426     plicman->SetErrorHandler(&gErrorHandler);
00427     plicman->RegisterLicenseFile("license.dat");
00428   }
00429 
00430   // ask for an instance of speaker identification system
00431   SSpeakerID2I *psid = static_cast<SSpeakerID2I *>(BSAPICreateInstance(SIID_SPKID2));
00432   if(!psid)
00433   {
00434     return 1;
00435   }
00436 
00437   // tell the instance where to send error messages
00438   psid->SetErrorHandler(&gErrorHandler);
00439   
00440   // read configuration file and configure the instance
00441   char pdefault_cfg[1024];
00442   sprintf(pdefault_cfg, "settings%sconfig", DIRSEP);  // Visual C does not have snprintf
00443   if(!psid->Init((pconfig_file ? pconfig_file : pdefault_cfg)))
00444   {
00445     psid->Release();
00446     return 1;
00447   }
00448   
00449   // Set a directory saving models of speakers.
00450   // If it is not set, a default one (pre-set in config file) is used.
00451   if(pmodel_dir && !psid->SetModelDirectory(pmodel_dir))
00452   {
00453     psid->Release();
00454     return 1;
00455   }
00456   
00457   // It is also possible to configure the waveform source.
00458   // If there are multiple channels, the channels are concatenated and just one decision made.
00459   SWaveformFormatConvertorI *pwc = psid->GetWaveformFormatConvertor();
00460   if (pwc)
00461   {
00462     pwc->SetNChannels(nchannels);
00463     pwc->SetInputFormatStr(pwave_fmt);
00464   }
00465   
00466   // Get calibration object if the calibration is turned on
00467   SUserCalibrationI *pcalib = 0;
00468   if (pin_calib_file || pout_calib_file)
00469   {
00470     SBlockSetI *pbset = psid->GetBlockSet();
00471     if(!pbset)
00472     {
00473       psid->Release();
00474       return 1;
00475     }
00476   
00477     pcalib = static_cast<SUserCalibrationI *>(pbset->GetBlock("user_calibration"));
00478     if(!pcalib)
00479     {
00480       psid->Release();
00481       return 1;
00482     }
00483   }
00484 
00485   // Train speaker models. 
00486   // There are more possibilities of input: memory block, file, list of files, directory.
00487   // The training mode is enabled when one of AddXXX functions is entred first time.
00488   // There is possibility to train multiple speakers together. This could be requested in future for discriminative training.
00489   if(training_mode)
00490   {
00491     // verify that we have the speaker name
00492     if((pinput_file || pinput_dir) && !pspeaker_name)
00493     {
00494       fprintf(stderr, "ERROR: Training using one file or directory without knowing speaker name. Please set -g speaker\n");
00495       psid->Release();
00496       return 1;
00497     }
00498 
00499     // load calibration parameters if requested
00500     if (pin_calib_file)
00501     {
00502       assert(pcalib);
00503       pcalib->SetEnabled(true);
00504       if (!pcalib->Load(pin_calib_file))
00505       {
00506         psid->Release();
00507         return 1;
00508       }
00509     }
00510 
00511     // Get number of requested training iterations. This will return value one very likely, but the iterative
00512     // training procedure was chosen for the API to allow more difficult (discriminative) training procedures.
00513     // in future.
00514     int nreq_iters = psid->GetNRequestedTrainingIters();
00515 
00516     int i;
00517     for(i = 0; i < nreq_iters; i++)
00518     {
00519 
00520       // it is not necessary to call StartTrainingIteration if all the signal modelling subsystems 
00521       // are known to request just one iteration
00522       if(!psid->StartTrainingIteration())
00523       {
00524         psid->Release();
00525         return 1;
00526       }
00527 
00528 #ifdef PROCESS_WAVEFORM
00529       // This is for demonstration purposes only! MS WAVE files can not
00530       // be processed correctly in this simplified case because AddWaveform()
00531       // accepts raw waveform data only (without any header)!
00532       if(pinput_file)
00533       {
00534         int nbytes = 0;
00535         char *pwaveform = LoadFileToMem(pinput_file, &nbytes);
00536         if(!pwaveform)
00537         {
00538           psid->Release();
00539           return 1;
00540         }
00541         
00542         bool result = psid->AddWaveform(pspeaker_name, pwaveform, nbytes);
00543         delete [] pwaveform;
00544         if(!result)
00545         {
00546           psid->Release();
00547           return 1;
00548         }
00549       }
00550 #else
00551       // the input is one file
00552       if(pinput_file && !psid->AddFile(pspeaker_name, pinput_file))
00553       {
00554         psid->Release();
00555         return 1;
00556       }
00557 #endif
00558 
00559       // The input is listfile. Each line of the file has two columns - speaker name and waveform file.
00560       // The list can be also in memory (AddFilesFromMemList).
00561       if(plist_file && !psid->AddFilesFromListFile(plist_file))
00562       {
00563         psid->Release();
00564         return 1;
00565       }
00566 
00567       // the input is directory
00568       if(pinput_dir && !psid->AddFilesFromDirectory(pspeaker_name, pinput_dir, pwave_ext))
00569       {
00570         psid->Release();
00571         return 1;
00572       }
00573     }
00574 
00575     // It is enough to call FinishTraining when all speech files/segments are added.
00576     // The system will estimate new models. The models are ready to use in the current instance
00577     // and also saved to the actual model directory for next use.
00578     if(!psid->FinishTraining())
00579     {
00580       psid->Release();
00581       return 1;
00582     }
00583     
00584 /*    if (pspeaker_name && (psid->GetTrainingLength(pspeaker_name) < SID_MIN_LEN_TO_PROCESS) && !supp_too_short)
00585     {
00586       fprintf(stderr, "WARNING: Training record(s) contain only %.3f seconds of speech. "
00587         "At least %f seconds are needed to obtain significant results.\n",
00588         psid->GetTrainingLength(pspeaker_name), SID_MIN_LEN_TO_PROCESS);
00589     }*/
00590   }
00591   else if (pout_calib_file)
00592   {
00593     assert(pcalib);
00594 
00595     // Start calibration estimation
00596     pcalib->StartEstimation();
00597 
00598     // Set active waveform part. This can significantly speed up identification if the records are long.
00599     if(!psid->SetActiveWaveformPart(acwf_start, acwf_len))
00600     {
00601       psid->Release();
00602       return 1;
00603     }
00604 
00605     // the input is one file
00606     if(pinput_file)
00607     {
00608       gErrorHandler.LogMessage("Processing file: %s", pinput_file);
00609       fprintf(stderr, "WARNING: Calibration estimated on just one file is not significant.\n");
00610       if(!psid->TestFile(pinput_file))
00611       {
00612         psid->Release();
00613         return 1;
00614       }
00615     }
00616 
00617     // the input is listfile
00618     if(plist_file)
00619     {
00620       SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00621       if(!plist)
00622       {
00623         fprintf(stderr, "Memory allocation error.");
00624         psid->Release();
00625         return 1;
00626       }
00627       plist->SetErrorHandler(&gErrorHandler);
00628       if(!plist->AddList(plist_file))
00629       {
00630         plist->Release();
00631         psid->Release();
00632         return 1;
00633       }
00634 
00635       plist->FirstLine();
00636       char ptarget[1024];
00637       char psource[1024];
00638       int start;
00639       int end;
00640       float prob;
00641       while(plist->GetLine(ptarget, psource, &start, &end, &prob))
00642       {
00643         gErrorHandler.LogMessage("Processing file: %s", psource);
00644         if(!psid->TestFile(psource))
00645         {
00646           plist->Release();
00647           psid->Release();
00648           return 1;
00649         }
00650       }
00651       plist->Release();
00652     }
00653 
00654     // the input is a directory
00655     // The FileSniffer can monitor a directory for incoming files. Here, the dictionary is searched just once.
00656     if(pinput_dir)
00657     {
00658       SFileSnifferI *psniffer = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00659       if(!psniffer)
00660       {
00661         fprintf(stderr, "Memory allocation error.");
00662         psid->Release();
00663         return 1;
00664       }
00665       psniffer->SetErrorHandler(&gErrorHandler);
00666       psniffer->AddDirectory(pinput_dir);
00667       psniffer->AddWantedSuffix(pwave_ext);
00668 
00669       if(!psniffer->FirstFile())
00670       {
00671         psniffer->Release();
00672         psid->Release();
00673         return 1;
00674       }
00675       
00676       char psource[1024];
00677       while(psniffer->GetFile(psource, sizeof(psource) - 1))
00678       {
00679         gErrorHandler.LogMessage("Processing file: %s", psource);
00680         if(!psid->TestFile(psource))
00681         {
00682           psniffer->Release();
00683           psid->Release();
00684           return 1;
00685         }
00686       }
00687       psniffer->Release();
00688     }
00689 
00690     // All files has been processed, end calibration estimation
00691     pcalib->EndEstimation();
00692 
00693     // Save calibration parameters
00694     if (!pcalib->Save(pout_calib_file))
00695     {
00696       psid->Release();
00697       return 1;
00698     }
00699   }
00700   else
00701   {
00702     // Scoring files.
00703 
00704     // By default, the scores are sent from the speaker identification system using a callback function.
00705     // It is possible attach a score memory that remembers the last scores and enables to access them any time
00706     // by the GetScores function.
00707     SScoreMemoryI *pscore_mem = static_cast<SScoreMemoryI *>(BSAPICreateInstance(SIID_SCOREMEMORY));
00708     if(!pscore_mem)
00709     {
00710       psid->Release();
00711       return 1;
00712     }
00713     pscore_mem->SetErrorHandler(&gErrorHandler);
00714     psid->SetTarget(pscore_mem);
00715 
00716     // Set active waveform part. This can significantly speed up identification if the records are long.
00717     if(!psid->SetActiveWaveformPart(acwf_start, acwf_len))
00718     {
00719       psid->Release();
00720       pscore_mem->Release();
00721       return 1;
00722     }
00723 
00724     // It is necesary to activate some or all models before scoring.
00725     if(pactive_speakers)
00726     {
00727       if (!psid->ActivateModels(pactive_speakers))
00728       {
00729         psid->Release();
00730         pscore_mem->Release();
00731         return 1;
00732       }
00733     }
00734     else
00735     {
00736       if (!psid->ActivateAllModels())
00737       {
00738         psid->Release();
00739         pscore_mem->Release();
00740         return 1;
00741       }
00742     }
00743 
00744     // load calibration parameters if requested
00745     if (pin_calib_file)
00746     {
00747       assert(pcalib);
00748       pcalib->SetEnabled(true);
00749       if (!pcalib->Load(pin_calib_file))
00750       {
00751         psid->Release();
00752         pscore_mem->Release();
00753         return 1;
00754       }
00755     }
00756 
00757     // Just one speech file or memory block can be scored. But the library have another possibilities to simplify 
00758     // processing of listfiles and directories
00759 
00760     // open output file
00761     FILE *pf_out = stdout;
00762     if(poutput_file)
00763     {
00764       pf_out = fopen(poutput_file, "w");
00765       if(!pf_out)
00766       {
00767         fprintf(stderr, "ERROR: Can not open output score file '%s'.", poutput_file);
00768         psid->Release();
00769         pscore_mem->Release();
00770         return 1;
00771       }
00772     }
00773 
00774     // the input is one file
00775     if(pinput_file)
00776     {
00777       gErrorHandler.LogMessage("Processing file: %s", pinput_file);
00778 #ifdef PROCESS_WAVEFORM
00779       // This is for demonstration purposes only! MS WAVE files can not
00780       // be processed correctly in this simplified case because TestWaveform()
00781       // accepts raw waveform data only (without any header)!
00782       int nbytes = 0;
00783       char *pwaveform = LoadFileToMem(pinput_file, &nbytes);
00784       if(!pwaveform)
00785       {
00786         psid->Release();
00787         pscore_mem->Release();
00788         return 1;
00789       }
00790 
00791       bool result = psid->TestWaveform(pwaveform, nbytes);
00792       delete [] pwaveform;
00793       if(!result)
00794 #else
00795       if(!psid->TestFile(pinput_file))
00796 #endif
00797       {
00798         psid->Release();
00799         pscore_mem->Release();
00800         return 1;
00801       }
00802       if(!pscore_mem->Empty())
00803         DumpScore(pinput_file, pscore_mem->GetScores(), dump_all_scores, pcolumn_fmt, sharpness, pf_out, supp_too_short);
00804     }
00805 
00806     // the input is listfile
00807     if(plist_file)
00808     {
00809       SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00810       if(!plist)
00811       {
00812         fprintf(stderr, "Memory allocation error.");
00813         psid->Release();
00814         pscore_mem->Release();
00815         return 1;
00816       }
00817       plist->SetErrorHandler(&gErrorHandler);
00818       if(!plist->AddList(plist_file))
00819       {
00820         plist->Release();
00821         psid->Release();
00822         pscore_mem->Release();
00823         return 1;
00824       }
00825 
00826       plist->FirstLine();
00827       char ptarget[1024];
00828       char psource[1024];
00829       int start;
00830       int end;
00831       float prob;
00832       while(plist->GetLine(ptarget, psource, &start, &end, &prob))
00833       {
00834         gErrorHandler.LogMessage("Processing file: %s", psource);
00835         if(!psid->TestFile(psource))
00836         {
00837           plist->Release();
00838           psid->Release();
00839           pscore_mem->Release();
00840           return 1;
00841         }
00842         if(!pscore_mem->Empty())
00843           DumpScore(psource, pscore_mem->GetScores(), dump_all_scores, pcolumn_fmt, sharpness, pf_out, supp_too_short);
00844       }
00845       plist->Release();
00846     }
00847 
00848     // the input is a directory
00849     // The FileSniffer can monitor a directory for incoming files. Here, the dictionary is searched just once.
00850     if(pinput_dir)
00851     {
00852       SFileSnifferI *psniffer = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00853       if(!psniffer)
00854       {
00855         fprintf(stderr, "Memory allocation error.");
00856         psid->Release();
00857         pscore_mem->Release();
00858         return 1;
00859       }
00860       psniffer->SetErrorHandler(&gErrorHandler);
00861       psniffer->AddDirectory(pinput_dir);
00862       psniffer->AddWantedSuffix(pwave_ext);
00863 
00864       if(!psniffer->FirstFile())
00865       {
00866         psniffer->Release();
00867         psid->Release();
00868         pscore_mem->Release();
00869         return 1;
00870       }
00871       
00872       char psource[1024];
00873       while(psniffer->GetFile(psource, sizeof(psource) - 1))
00874       {
00875         gErrorHandler.LogMessage("Processing file: %s", psource);
00876         if(!psid->TestFile(psource))
00877         {
00878           psniffer->Release();
00879           psid->Release();
00880           pscore_mem->Release();
00881           return 1;
00882         }
00883         if(!pscore_mem->Empty())
00884           DumpScore(psource, pscore_mem->GetScores(), dump_all_scores, pcolumn_fmt, sharpness, pf_out, supp_too_short);
00885       }
00886       psniffer->Release();
00887     }
00888 
00889     // close output file
00890     if(pf_out != stdout)
00891       fclose(pf_out);
00892 
00893     pscore_mem->Release();
00894   }
00895 
00896   // release the SID instance
00897   psid->Release();
00898 
00899   return 0;
00900 }

Generated on Wed Apr 11 10:00:17 2012 for BSAPI by  doxygen 1.4.7