offlinerec3.cpp

This example shows how to use the SOfflineSpeechRecognizer3I interface.

00001 /*****************************************************************
00002  *  BSAPI LVCSR Example                                          *
00003  *                                                               *
00004  *  Author      : Petr Schwarz, Pavel Matejka,                   *
00005  *                Igor Szoke, Tomas Cipr                         *
00006  *  Copyright   : (C) 2006-2011 by Phonexia s.r.o                *
00007  *                                                               *
00008  *  For more info, please contact us at support@phonexia.com     *
00009  *****************************************************************/
00010 
00011 #include "bsapi.h"
00012 #include <stdio.h>
00013 #include <string.h>
00014 #include <stdarg.h>
00015 #include <time.h>
00016 
00017 #include "stimer.h"
00018 #include "getopt.h"
00019 
00020 #if _MSC_VER
00021 #define snprintf _snprintf
00022 #endif
00023 
00024 // default option values
00025 #define SID_DEF_IN_DATA_TYPE      "waveform"
00026 #define SID_DEF_FEA_FMT           "htk"
00027 
00028 static void change_file_path(char *pFileName, const char *pNewPath);
00029 static void change_file_suffix(char *pFileName, const char *pNewSuffix);
00030 static void get_file_path(const char *pFileName, char *pRetPath);
00031 static void get_file_suffix(const char *pFileName, char *pRetSuffix);
00032 static bool rename_file(const char *pOldFile, const char *pNewFile);
00033 static void change_file_suffix(char *pFileName, const char *pNewSuffix);
00034 
00035 static bool process_file(
00036   const char *pInputFile,
00037   const char *pOutputFile,
00038   const char *pOutputDir,
00039   const char *pTransSuffix,
00040   bool moveInputFile,
00041   SOfflineSpeechRecognizer3I *pSpeechRec);
00042 
00043 class ErrorHandler : public SErrorCallbackI 
00044 {
00045   public:
00046     ErrorHandler() : mVerbose(false) {}
00047     
00048     virtual void BSAPI_METHOD OnTextMessage(SUnknownI *pSender, message_type type, unsigned int messageId, const char *pMessage)
00049     {
00050       unsigned int iid = pSender ? pSender->GetIID() : SIID_UNDEFINED;
00051       switch(type)
00052       {
00053         case mtError:
00054           fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00055           break;
00056         case mtWarning:
00057           fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);       
00058           break;
00059         case mtLog:
00060           LogMessage(pMessage);
00061          break;
00062       }
00063     }
00064     
00065     void LogMessage(const char *pMessage, ...)
00066     {
00067       if (mVerbose)
00068       {
00069         va_list ap;
00070         va_start(ap, pMessage);
00071         vfprintf(stderr, pMessage, ap);
00072         fprintf(stderr, "\n");
00073         va_end(ap);
00074       }
00075     }
00076     
00077     void SetVerbose(bool verbose) {mVerbose = verbose;}
00078   
00079   protected:
00080     bool mVerbose;
00081 } gErrorHandler;
00082 
00083 class SStateHandler : public SOfflineSpeechRecCallbackI
00084 {
00085   public:
00086     SStateHandler() : mSaveLog(false), mpLogSuffix(0), mStartTime(-1), mStopTime(-1) {}
00087 
00088     bool BSAPI_METHOD OnFileStarted(const char *pInputFile, const char *pOutputFile)
00089     {
00090       gErrorHandler.LogMessage("Processing file '%s'", pInputFile);
00091       time(&mStartTime);
00092       mProcessingTimer.Start();
00093       return true;
00094     }
00095 
00096     bool BSAPI_METHOD OnSegmentStarted(long_long segmentStart, long_long segmentEnd, int channel, const char *pTranscriptionFile)
00097     {
00098       gErrorHandler.LogMessage("  segment [%d, %d]", static_cast<int>(segmentStart / 100000), static_cast<int>(segmentEnd / 100000));
00099       return true;
00100     }
00101 
00102     bool BSAPI_METHOD OnTranscription(SUnknownI *pSender, STranscriptionI *pTrans)
00103     {
00104       return true;
00105     }
00106 
00107     bool BSAPI_METHOD OnSegmentDone(long_long segmentStart, long_long segmentEnd, int channel, const char *pTranscriptionFile, bool isValid)
00108     {
00109       return true;
00110     }
00111 
00112     bool BSAPI_METHOD OnFileDone(const char *pInputFile, const char *pOutputFile)
00113     {
00114       mProcessingTimer.Stop();
00115       time(&mStopTime);
00116 
00117       if (mSaveLog)
00118         SaveLog(pInputFile, pOutputFile);
00119 
00120       return true;
00121     }
00122 
00123     void SetSaveLog(bool saveLog) {mSaveLog = saveLog;}
00124     void SetLogSuffix(const char *pLogSuffix) {mpLogSuffix = pLogSuffix;}
00125 
00126   protected:
00127     bool SaveLog(const char *pInputFile, const char *pOutputFile)
00128     {
00129       char plog_file_name[1024];
00130       plog_file_name[sizeof(plog_file_name) - 1] = '\0';
00131       strncpy(plog_file_name, pOutputFile, sizeof(plog_file_name) - 1);
00132       change_file_suffix(plog_file_name, mpLogSuffix);
00133 
00134       FILE *plog_file = fopen(plog_file_name, "a");
00135       if (!plog_file)
00136       {
00137         fprintf(stderr, "ERROR: Can not open log file '%s'.\n", plog_file_name);
00138         return false;
00139       }
00140 
00141       if (ftell(plog_file) != 0)
00142         fputc('\n', plog_file);
00143 
00144       fprintf(plog_file, "[lvcsr_software]\n");
00145       fprintf(plog_file, "bsapi_release=%s\n", BSAPIVersion());
00146 
00147       fputc('\n', plog_file);
00148       fprintf(plog_file, "[processing_info]\n");
00149       fprintf(plog_file, "input_file=%s\n", pInputFile);
00150       fprintf(plog_file, "output_file=%s\n", pOutputFile);
00151       fprintf(plog_file, "processing_start=%s", ctime(&mStartTime));
00152       fprintf(plog_file, "processing_stop=%s", ctime(&mStopTime));
00153       fprintf(plog_file, "duration_sec=%.3f\n", mProcessingTimer.GetInterval());
00154 
00155       fclose(plog_file);
00156       return true;
00157     }
00158 
00159   protected:
00160     bool mSaveLog;
00161     const char *mpLogSuffix;
00162     time_t mStartTime;
00163     time_t mStopTime;
00164     SSimpleTimer mProcessingTimer;
00165 
00166 } gStateHandler;
00167 
00168 class SFileSnifferCallback : public SFileSnifferCallbackI
00169 {
00170   public:
00171     SFileSnifferCallback() : 
00172       mpSpeechRec(0),
00173       mpOutputDir(0),
00174       mpTransSuffix(0),
00175       mMoveInputToOutput(0),
00176       mResult(true)
00177     {
00178     }
00179 
00180     ~SFileSnifferCallback() {}
00181 
00182     bool BSAPI_METHOD OnFile(const char *pFile)
00183     {
00184       mResult &= process_file(pFile, 0, mpOutputDir, mpTransSuffix, mMoveInputToOutput, mpSpeechRec);
00185       return true;
00186     }
00187 
00188     bool BSAPI_METHOD OnStateChanged(state_type state)
00189     {
00190       switch (state)
00191       {
00192         case stLoopStarted: mResult = true; break;
00193         case stWaitForData: printf("Waiting for input files... Press ENTER to stop.\n"); break;
00194         case stTerminated:  break;
00195       }
00196       return true;
00197     }
00198 
00199     void SetSpeechRec(SOfflineSpeechRecognizer3I *pSpeechRec)  {mpSpeechRec = pSpeechRec;}
00200     void SetOutputDir(const char *pOutputDir)                 {mpOutputDir = pOutputDir;}
00201     void SetTransSuffix(const char *pTransSuffix)             {mpTransSuffix = pTransSuffix;}
00202     void SetMoveInputToOutput(bool moveInputToOutput)         {mMoveInputToOutput = moveInputToOutput;}
00203     bool GetResult()                                          {return mResult;}
00204 
00205   protected:
00206     SOfflineSpeechRecognizer3I *mpSpeechRec;
00207     const char *mpOutputDir;
00208     const char *mpTransSuffix;
00209     bool mMoveInputToOutput;
00210     bool mResult;
00211 
00212 } gFileSnifferHandler;
00213 
00214 
00215 void help(const char *pProgName)
00216 {
00217   puts("\n Offline speech recognizer                                             ");
00218   printf(" %s\n", BSAPIVersion());
00219   puts(" ======================================================================= ");
00220   puts("                                                                         ");
00221   printf(" USAGE: %s [options]\n", pProgName);
00222   puts("                                                                         ");
00223   puts(" system configuration:                                                   ");
00224   puts("   -c file         config file                                           ");
00225   puts("                                                                         ");
00226   puts(" input from single file:                                                 ");
00227   puts("   -i file         input waveform or features file                       ");
00228   puts("   -j str [waveform] input data type (waveform, features)                ");
00229   puts("   -o file         output transcription file                             ");
00230   puts("                                                                         ");
00231   puts(" input data type = features:                                             ");
00232   puts("   -u fmt [htk]    feature format (ascii, binary, htk)                   ");
00233   puts("                                                                         ");
00234   puts(" input from list file:                                                   ");
00235   puts("   -l file         list of input [and output files]                      ");
00236   puts("                                                                         ");
00237   puts(" input from directory:                                                   ");
00238   puts("   -r dir          input directory                                       ");
00239   puts("   -e str [wav]    extensions of audio files (comma separated)           ");
00240   puts("   -a              input directory auto-scan mode                        ");
00241   puts("                                                                         ");
00242   puts(" output specification:                                                   ");
00243   puts("   -f type         list of output types (comma separated)                ");
00244   puts("                   str - one best string                                 ");
00245   puts("                   lat - lattice (graph with alternative sentences)      ");
00246   puts("                   cn  - confusion network (graph with alternative words)");
00247   puts("   -b flags        format flags for transctiption strings                ");
00248   puts("                     TXT:SEn      use pure text format                   ");
00249   puts("                       S,E include start/end time                        ");
00250   puts("                       n   deletes the auxiliary symbols                 ");
00251   puts("                     LBF:PSEl     use HTK label format                   ");
00252   puts("                       S,E include start/end time                        ");
00253   puts("                       l   include word likelihood                       ");
00254   puts("                       P   include word confidence                       ");
00255   puts("                     FST:NWPSEl   use graph format                       ");
00256   puts("                       N   include graph nodes                           ");
00257   puts("                       W   include word identification                   ");
00258   puts("                       S,E include start/end time                        ");
00259   puts("                       l   include word likelihood                       ");
00260   puts("                       P   include word confidence                       ");
00261   puts("   -h flags        format flags for lattices NWPpSELAl                   ");
00262   puts("                       N   include graph nodes                           ");
00263   puts("                       W   include word identification                   ");
00264   puts("                       P   include word posterior probability            ");
00265   puts("                       p   include word posterior probability in log     ");
00266   puts("                       S,E include start/end time                        ");
00267   puts("                       A   include word acoustic likelihood              ");
00268   puts("                       L   include word language likelihood              ");
00269   puts("                       l   include word total likelihood                 ");
00270   puts("   -k flags        format flags for confusion networks TNWPpSE           ");
00271   puts("                       T   include time slot                             ");
00272   puts("                       N   include graph nodes                           ");
00273   puts("                       W   include word identification                   ");
00274   puts("                       P   include word posterior probability            ");
00275   puts("                       p   include word posterior probability in log     ");
00276   puts("                       S,E include start/end time                        ");
00277   puts("   -n num [0]      maximal number of parallel words in CN (0 means inf)  ");
00278   puts("   -p num [-70]    minimal posterior probability of word in log for CN   ");
00279   puts("   -t str [default] time format (htk, frames, s, ms, hms, dhms, default) ");
00280   puts("                      htk     - time in htk time units = 100 nanoseconds ");
00281   puts("                      frames  - time in number of frames = 10 milliseconds");
00282   puts("                      s       - time in seconds                          ");
00283   puts("                      ms      - time in format minutes:seconds           ");
00284   puts("                      hms     - hours:minutes:seconds                    ");
00285   puts("                      dhms    - days hours:minutes:seconds               ");
00286   puts("                      default - htk for str LBF output type, s for others");
00287   puts("   -s str [trn]    one best string file suffix                           ");
00288   puts("   -y str [lat]    lattice file suffix                                   ");
00289   puts("   -z str [cn]     confusion network file suffix                         ");
00290   puts("                                                                         ");
00291   puts(" common options:                                                         ");
00292   puts("   -w fmt [lin16]  waveform format (lin16, lin8, alaw, mulaw)            ");
00293   puts("   -d dir          output directory                                      ");
00294   puts("   -m              move input waveform file to output directory          ");
00295   puts("   -g              save log file                                         ");
00296   puts("   -x str [log]    log file suffix                                       ");
00297   puts("   -v              verbose mode                                          ");
00298   puts("                                                                         ");
00299 }
00300 
00301 
00302 void change_file_path(char *pFileName, const char *pNewPath)
00303 {
00304   char pbuff[1024];
00305   strcpy(pbuff, pFileName);
00306         
00307   char *psep_pos1 = strrchr(pbuff, '/');
00308   char *psep_pos2 = strrchr(pbuff, '\\');
00309   char *psep_pos = (psep_pos1 > psep_pos2 ? psep_pos1 : psep_pos2);
00310 
00311   if(!psep_pos)
00312     return;
00313 
00314   strcpy(pFileName, pNewPath);
00315   strcat(pFileName, psep_pos);
00316 }
00317 
00318 void change_file_suffix(char *pFileName, const char *pNewSuffix)
00319 {
00320   char *pdot_pos = strrchr(pFileName, '.');
00321   char *psep_pos1 = strrchr(pFileName, '/');
00322   char *psep_pos2 = strrchr(pFileName, '\\');
00323   char *psep_pos = (psep_pos1 > psep_pos2 ? psep_pos1 : psep_pos2);
00324 
00325   if(!pdot_pos || (psep_pos && psep_pos > pdot_pos))  // dot is not found or dot is in path 
00326   {
00327     strcat(pFileName, ".");
00328     strcat(pFileName, pNewSuffix);
00329   }
00330   else
00331   {
00332     strcpy(pdot_pos + 1, pNewSuffix);
00333   }
00334 }
00335 
00336 void cutof_file_suffix(char *pFileName)
00337 {
00338   char *sep_pos1 = strrchr(pFileName, '/');
00339   char *sep_pos2 = strrchr(pFileName, '\\');
00340   char *dot_pos = strrchr(pFileName, '.');
00341   char *sep_pos = (sep_pos1 > sep_pos2 ? sep_pos1 : sep_pos2);
00342   if(dot_pos && (!sep_pos || dot_pos > sep_pos))
00343     *dot_pos = '\0';
00344 }
00345 
00346 void get_file_path(const char *pFileName, char *pRetPath)
00347 {
00348   strcpy(pRetPath, pFileName);
00349 
00350   char *psep_pos1 = strrchr(pRetPath, '/');
00351   char *psep_pos2 = strrchr(pRetPath, '\\');
00352   char *psep_pos = (psep_pos1 > psep_pos2 ? psep_pos1 : psep_pos2);
00353 
00354   if(psep_pos)
00355     *psep_pos = '\0';
00356   else
00357     pRetPath[0] = '\0';
00358 }
00359 
00360 void get_file_suffix(const char *pFileName, char *pRetSuffix)
00361 {
00362   const char *psep_pos1 = strrchr(pFileName, '/');
00363   const char *psep_pos2 = strrchr(pFileName, '\\');
00364   const char *pdot_pos = strrchr(pFileName, '.');
00365   const char *psep_pos = (psep_pos1 > psep_pos2 ? psep_pos1 : psep_pos2);
00366         
00367   *pRetSuffix = '\0';
00368   if(pdot_pos && (!psep_pos || pdot_pos > psep_pos))
00369   strcpy(pRetSuffix, pdot_pos + 1);
00370 }
00371 
00372 bool rename_file(const char *pOldFile, const char *pNewFile)
00373 {
00374   return (rename(pOldFile, pNewFile) == 0);
00375 }
00376 
00377 bool process_file(
00378   const char *pInputFile,
00379   const char *pOutputFile,
00380   const char *pOutputDir,
00381   const char *pTransSuffix,
00382   bool moveInputFile,
00383   SOfflineSpeechRecognizer3I *pSpeechRec)
00384 {
00385   char poutput_file[1024];
00386   poutput_file[sizeof(poutput_file) - 1] = '\0';
00387 
00388   if (!pOutputFile || strlen(pOutputFile) == 0)
00389   {
00390     strncpy(poutput_file, pInputFile, sizeof(poutput_file) - 1);
00391     if (pOutputDir)
00392      change_file_path(poutput_file, pOutputDir);
00393     if(strlen(pTransSuffix) == 0 || strcmp(pTransSuffix, "") == 0)
00394       cutof_file_suffix(poutput_file);
00395     else
00396       change_file_suffix(poutput_file, pTransSuffix);
00397     pOutputFile = poutput_file;
00398   }
00399 
00400   if (!pOutputDir)
00401   {
00402     char poutput_dir[1024];
00403     poutput_dir[sizeof(poutput_dir) - 1] = '\0';
00404     get_file_path(pOutputFile, poutput_dir);
00405     pSpeechRec->SetOutputDirectory(poutput_dir);
00406   }
00407 
00408   if (!pSpeechRec->ProcessFile(pInputFile, pOutputFile))
00409     return false;
00410 
00411   if (moveInputFile)
00412   {
00413     char psuffix[1024];
00414     psuffix[sizeof(psuffix) - 1] = '\0';
00415     get_file_suffix(pInputFile, psuffix);
00416 
00417     if (poutput_file != pOutputFile)
00418       strncpy(poutput_file, pOutputFile, sizeof(poutput_file) - 1);
00419 
00420     change_file_suffix(poutput_file, psuffix);
00421     if (strcmp(pInputFile, poutput_file) != 0)
00422       rename_file(pInputFile, poutput_file);
00423   }
00424 
00425   return true;
00426 }
00427 
00428 bool process_list(
00429   const char *pListFile,
00430   const char *pOutputDir,
00431   const char *pTransSuffix,
00432   bool moveInputFile,
00433   SOfflineSpeechRecognizer3I *pSpeechRec)
00434 {
00435   SFileListI *pfile_list = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00436   if (!pfile_list)
00437   {
00438     fprintf(stderr, "ERROR: Memory allocation error.\n");
00439     return false;
00440   }
00441 
00442   pfile_list->SetErrorHandler(&gErrorHandler);
00443   pfile_list->SetVerbose(false);
00444   pfile_list->SetTarget(0);
00445   pfile_list->Clear();
00446 
00447   if (!pfile_list->AddList(pListFile))
00448   {
00449     pfile_list->Release();
00450     return false;
00451   }
00452 
00453   char ptarget[1024];
00454   char psource[1024];
00455   int start;
00456   int end;
00457   float prob;
00458 
00459   bool result = true;
00460   pfile_list->FirstLine();
00461   while (pfile_list->GetLine(ptarget, psource, &start, &end, &prob))
00462   {
00463     result &= process_file(psource, ptarget, pOutputDir, pTransSuffix, moveInputFile, pSpeechRec);
00464   }
00465 
00466   pfile_list->Release();
00467   return result;
00468 }
00469 
00470 bool process_dir(const char *pInputDir, const char *pSuffixes, bool asyncMode)
00471 {
00472   SFileSnifferI *psniffer = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00473   if (!psniffer)
00474   {
00475     fprintf(stderr, "ERROR: Memory allocation error.\n");
00476     return false;
00477   }
00478 
00479   psniffer->SetErrorHandler(&gErrorHandler);
00480   psniffer->SetTarget(&gFileSnifferHandler);
00481   psniffer->SetUseLocking(asyncMode);
00482 
00483   if (!psniffer->AddDirectory(pInputDir))
00484   {
00485     psniffer->Release();
00486     return false;
00487   }
00488 
00489   char psuffixes[1024];
00490   psuffixes[sizeof(psuffixes) - 1] = '\0';
00491   strncpy(psuffixes, pSuffixes, sizeof(psuffixes) - 1);
00492   char *psuffix = strtok(psuffixes, ",; ");
00493   while (psuffix)
00494   {
00495     psniffer->AddWantedSuffix(psuffix);
00496     psuffix = strtok(0, ",; ");
00497   }
00498 
00499   if (asyncMode)
00500   {
00501     if (!psniffer->ProcessFilesAsync())
00502     {
00503       psniffer->Release();
00504       return false;
00505     }
00506 
00507     getchar();
00508     psniffer->Terminate(); // stop scan, blocks until processing terminates
00509   }
00510   else
00511   {
00512     psniffer->ProcessFiles();
00513   }
00514 
00515   psniffer->Release();
00516   return gFileSnifferHandler.GetResult();
00517 }
00518 
00519 int main(int argc, char *pArgs[])
00520 {
00521   const char *pinput_file   = 0;
00522   const char *pinput_dir    = 0;
00523   const char *poutput_file  = 0;
00524   const char *poutput_dir   = 0;
00525   const char *plist_file    = 0;
00526   const char *pconfig_file  = 0;
00527   const char *pwave_fmt     = "lin16";
00528   const char *psuffixes     = "wav";
00529   const char *pone_best_suffix = "trn";
00530   const char *plattice_suffix  = "lat";
00531   const char *pcn_suffix       = "cn";
00532   const char *plog_suffix   = "log";
00533   const char *pin_data_type     = SID_DEF_IN_DATA_TYPE;
00534   const char *pfea_fmt          = SID_DEF_FEA_FMT;
00535   bool auto_dir_scan_mode   = false;
00536   bool move_input_to_ouput  = false;
00537   const char *poutput_format= "str";
00538   unsigned int output_types = 0;
00539   const char *pone_best_output_format = "TXT";
00540   const char *plattice_output_format  = "NWPSELAl";
00541   const char *pcn_output_format       = "TWPSE";
00542   const char *ptimet_format = "default";
00543   int max_parallel_words    = 0;
00544   float min_word_log_post   = -70.0f;
00545 
00546   // command line parsing
00547   if(argc == 1)
00548   {
00549     help(pArgs[0]);
00550     return 0;
00551   }
00552 
00553   optind = 0;
00554   while (1)
00555   {
00556     int c = getopt(argc, pArgs, const_cast<char *>("-i:o:r:e:af:b:h:k:n:p:t:w:d:l:c:s:y:z:mgx:j:u:v"));
00557     if(c == -1)
00558       break;
00559 
00560     switch(c)
00561     {
00562       case 'i':
00563         pinput_file = optarg;
00564         break;
00565       case 'o':
00566         poutput_file = optarg;
00567         break;
00568       case 'r':
00569         pinput_dir = optarg;
00570         break;
00571       case 'e':
00572         psuffixes = optarg;
00573         break;
00574       case 'a':
00575         auto_dir_scan_mode = true;
00576         break;
00577       case 'f':
00578         poutput_format = optarg;
00579         break;
00580       case 'b':
00581         pone_best_output_format = optarg;
00582         break;
00583       case 'h':
00584         plattice_output_format = optarg;
00585         break;
00586       case 'k':
00587         pcn_output_format = optarg;
00588         break;
00589       case 'n':
00590         if(sscanf(optarg, "%d", &max_parallel_words) != 1 || max_parallel_words < 0)
00591         {
00592           fprintf(stderr, "ERROR: Invalid maximal number of parallel words in CN '%s'. "
00593                           "Must be positive integer or 0.\n", optarg);
00594           return 1;
00595         }
00596         break;
00597       case 'p':
00598         if(sscanf(optarg, "%f", &min_word_log_post) != 1 || min_word_log_post > 0)
00599         {
00600           fprintf(stderr, "ERROR: Invalid minimal posterior probability of word in log for CN '%s'. "
00601                           "Must be negative floating point number.\n", optarg);
00602           return 1;
00603         }
00604         break;
00605       case 't':
00606         ptimet_format = optarg;
00607         break;
00608       case 'w':
00609         pwave_fmt = optarg;
00610         break;
00611       case 'd':
00612         poutput_dir = optarg;
00613         break;
00614       case 'l':
00615         plist_file = optarg;
00616         break;
00617       case 'c':
00618         pconfig_file = optarg;
00619         break;
00620       case 's':
00621         pone_best_suffix = optarg;
00622         break;
00623       case 'y':
00624         plattice_suffix = optarg;
00625         break;
00626       case 'z':
00627         pcn_suffix = optarg;
00628         break;
00629       case 'm':
00630         move_input_to_ouput = true;
00631         break;
00632       case 'g':
00633         gStateHandler.SetSaveLog(true);
00634         break;
00635       case 'x':
00636         plog_suffix = optarg;
00637         break;
00638       case 'j':
00639         pin_data_type = optarg;
00640         break;
00641       case 'u':
00642         pfea_fmt = optarg;
00643         break;
00644       case 'v':
00645         gErrorHandler.SetVerbose(true);
00646         break;
00647       case '?':
00648         fprintf(stderr, "ERROR: Command line parsing error.\n");
00649         return 1;
00650       default :
00651         fprintf(stderr, "ERROR: Command line parsing error. Unexpected argument '%s'.\n", optarg);
00652         return 1;
00653     }
00654   }
00655 
00656   if (!pconfig_file)
00657   {
00658     fprintf(stderr, "ERROR: Config file (-c) must be specified.\n");
00659     return 1;
00660   }
00661 
00662   if (!pinput_file && !plist_file && !pinput_dir)
00663   {
00664     fprintf(stderr, "ERROR: Either input file (-i), list of files (-l) or directory (-r) must be specified.\n");
00665     return 1;
00666   }
00667 
00668   SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00669   if (plicman)
00670   {
00671     plicman->SetErrorHandler(&gErrorHandler);
00672     plicman->RegisterLicenseFile("license.dat");
00673   }
00674   
00675   SOfflineSpeechRecognizer3I *psrec = static_cast<SOfflineSpeechRecognizer3I *>(BSAPICreateInstance(SIID_OFFLINESREC3));
00676   if (!psrec)
00677   {
00678     return 1;
00679   }
00680 
00681   psrec->SetErrorHandler(&gErrorHandler);
00682 
00683   if (!psrec->Init(pconfig_file))
00684   {
00685     psrec->Release();
00686     return 1;
00687   }
00688 
00689   gStateHandler.SetLogSuffix(plog_suffix);
00690   psrec->SetStateHandler(&gStateHandler);
00691 
00692   char pout_format[128];
00693   snprintf(pout_format, sizeof(pout_format) -1 , ",%s,", poutput_format);
00694   pout_format[sizeof(pout_format) - 1] = '\0';
00695 
00696   if(strstr(pout_format, ",str,"))
00697   {
00698     output_types |= OOT_ONEBEST;
00699   }
00700 
00701   if(strstr(pout_format, ",lat,"))
00702   {
00703     output_types |= OOT_LATTICE;
00704   }
00705 
00706   if(strstr(pout_format, ",cn,"))
00707   {
00708     output_types |= OOT_CONFUSIONNETWORK;
00709   }
00710 
00711   psrec->SetOutputTypes(output_types);
00712 
00713   psrec->SetOneBestOutputFormat(pone_best_output_format);
00714   psrec->SetLatticeOutputFormat(plattice_output_format);
00715   psrec->SetConfusionNetworkOutputFormat(pcn_output_format);
00716 
00717   psrec->SetOutputOneBestFileSuffix(pone_best_suffix);
00718   psrec->SetOutputLatticeFileSuffix(plattice_suffix);
00719   psrec->SetOutputConfusionNetworkFileSuffix(pcn_suffix);
00720 
00721   psrec->SetTimeFormat(ptimet_format);
00722 
00723   SBlockSetI *pbset = psrec->GetBlockSet();
00724   if (!psrec)
00725   {
00726     psrec->Release();
00727     return 1;
00728   }
00729 
00730   // configure the waveform source
00731   SWaveformFormatConvertorI *pwc = static_cast<SWaveformFormatConvertorI *>(pbset->GetBlock("waveform_convertor"));
00732   if (!pwc)
00733   {
00734     psrec->Release();
00735     return 1;
00736   }
00737 
00738   pwc->SetInputFormatStr(pwave_fmt);
00739 
00740   // configure maximal number of parallel words in CN
00741   SFSTToConfusionNetworkConvertorI *pfst_cn = static_cast<SFSTToConfusionNetworkConvertorI *>(pbset->GetBlock("fst_cn_convertor"));
00742   if (!pfst_cn)
00743   {
00744     psrec->Release();
00745     return 1;
00746   }
00747 
00748   pfst_cn->SetMaxParallelWords(max_parallel_words);
00749   pfst_cn->SetMinWordLogPosteriorProbability(min_word_log_post);
00750 
00751   if(pbset->Exists("feature_source"))
00752   {
00753     SFeatureSourceI *pfeature_source = static_cast<SFeatureSourceI *>(pbset->GetBlock("feature_source"));
00754     if(!pfeature_source)
00755     {
00756       psrec->Release();
00757       return 1;
00758     }
00759 
00760     if (!pfeature_source->SetFileFormatStr(pfea_fmt))
00761     {
00762       psrec->Release();
00763       return 1;
00764     }
00765   }
00766 
00767   if (poutput_dir)
00768     psrec->SetOutputDirectory(poutput_dir);
00769 
00770   if(!psrec->SetInputDataTypeStr(pin_data_type))
00771   {
00772     psrec->Release();
00773     return 1;
00774   }
00775 
00776   bool result = true;
00777   if (pinput_file)
00778     result = process_file(pinput_file, poutput_file, poutput_dir, "", move_input_to_ouput, psrec);
00779 
00780   if (plist_file)
00781     result = process_list(plist_file, poutput_dir, "", move_input_to_ouput, psrec);
00782 
00783   if (pinput_dir)
00784   {
00785     gFileSnifferHandler.SetOutputDir(poutput_dir);
00786     gFileSnifferHandler.SetTransSuffix("");
00787     gFileSnifferHandler.SetMoveInputToOutput(move_input_to_ouput);
00788     gFileSnifferHandler.SetSpeechRec(psrec);
00789     result = process_dir(pinput_dir, psuffixes, auto_dir_scan_mode);
00790   }
00791 
00792   psrec->Release();
00793   return (result ? 0 : 1);
00794 }

Generated on Wed Apr 11 10:00:17 2012 for BSAPI by  doxygen 1.4.7