00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "bsapi.h"
00012 #include <stdio.h>
00013 #include <string.h>
00014 #include <stdarg.h>
00015 #include <time.h>
00016
00017 #include "stimer.h"
00018 #include "getopt.h"
00019
00020 #if _MSC_VER
00021 #define snprintf _snprintf
00022 #endif
00023
00024
00025 #define SID_DEF_IN_DATA_TYPE "waveform"
00026 #define SID_DEF_FEA_FMT "htk"
00027
00028 static void change_file_path(char *pFileName, const char *pNewPath);
00029 static void change_file_suffix(char *pFileName, const char *pNewSuffix);
00030 static void get_file_path(const char *pFileName, char *pRetPath);
00031 static void get_file_suffix(const char *pFileName, char *pRetSuffix);
00032 static bool rename_file(const char *pOldFile, const char *pNewFile);
00033 static void change_file_suffix(char *pFileName, const char *pNewSuffix);
00034
00035 static bool process_file(
00036 const char *pInputFile,
00037 const char *pOutputFile,
00038 const char *pOutputDir,
00039 const char *pTransSuffix,
00040 bool moveInputFile,
00041 SOfflineSpeechRecognizer3I *pSpeechRec);
00042
00043 class ErrorHandler : public SErrorCallbackI
00044 {
00045 public:
00046 ErrorHandler() : mVerbose(false) {}
00047
00048 virtual void BSAPI_METHOD OnTextMessage(SUnknownI *pSender, message_type type, unsigned int messageId, const char *pMessage)
00049 {
00050 unsigned int iid = pSender ? pSender->GetIID() : SIID_UNDEFINED;
00051 switch(type)
00052 {
00053 case mtError:
00054 fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00055 break;
00056 case mtWarning:
00057 fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00058 break;
00059 case mtLog:
00060 LogMessage(pMessage);
00061 break;
00062 }
00063 }
00064
00065 void LogMessage(const char *pMessage, ...)
00066 {
00067 if (mVerbose)
00068 {
00069 va_list ap;
00070 va_start(ap, pMessage);
00071 vfprintf(stderr, pMessage, ap);
00072 fprintf(stderr, "\n");
00073 va_end(ap);
00074 }
00075 }
00076
00077 void SetVerbose(bool verbose) {mVerbose = verbose;}
00078
00079 protected:
00080 bool mVerbose;
00081 } gErrorHandler;
00082
00083 class SStateHandler : public SOfflineSpeechRecCallbackI
00084 {
00085 public:
00086 SStateHandler() : mSaveLog(false), mpLogSuffix(0), mStartTime(-1), mStopTime(-1) {}
00087
00088 bool BSAPI_METHOD OnFileStarted(const char *pInputFile, const char *pOutputFile)
00089 {
00090 gErrorHandler.LogMessage("Processing file '%s'", pInputFile);
00091 time(&mStartTime);
00092 mProcessingTimer.Start();
00093 return true;
00094 }
00095
00096 bool BSAPI_METHOD OnSegmentStarted(long_long segmentStart, long_long segmentEnd, int channel, const char *pTranscriptionFile)
00097 {
00098 gErrorHandler.LogMessage(" segment [%d, %d]", static_cast<int>(segmentStart / 100000), static_cast<int>(segmentEnd / 100000));
00099 return true;
00100 }
00101
00102 bool BSAPI_METHOD OnTranscription(SUnknownI *pSender, STranscriptionI *pTrans)
00103 {
00104 return true;
00105 }
00106
00107 bool BSAPI_METHOD OnSegmentDone(long_long segmentStart, long_long segmentEnd, int channel, const char *pTranscriptionFile, bool isValid)
00108 {
00109 return true;
00110 }
00111
00112 bool BSAPI_METHOD OnFileDone(const char *pInputFile, const char *pOutputFile)
00113 {
00114 mProcessingTimer.Stop();
00115 time(&mStopTime);
00116
00117 if (mSaveLog)
00118 SaveLog(pInputFile, pOutputFile);
00119
00120 return true;
00121 }
00122
00123 void SetSaveLog(bool saveLog) {mSaveLog = saveLog;}
00124 void SetLogSuffix(const char *pLogSuffix) {mpLogSuffix = pLogSuffix;}
00125
00126 protected:
00127 bool SaveLog(const char *pInputFile, const char *pOutputFile)
00128 {
00129 char plog_file_name[1024];
00130 plog_file_name[sizeof(plog_file_name) - 1] = '\0';
00131 strncpy(plog_file_name, pOutputFile, sizeof(plog_file_name) - 1);
00132 change_file_suffix(plog_file_name, mpLogSuffix);
00133
00134 FILE *plog_file = fopen(plog_file_name, "a");
00135 if (!plog_file)
00136 {
00137 fprintf(stderr, "ERROR: Can not open log file '%s'.\n", plog_file_name);
00138 return false;
00139 }
00140
00141 if (ftell(plog_file) != 0)
00142 fputc('\n', plog_file);
00143
00144 fprintf(plog_file, "[lvcsr_software]\n");
00145 fprintf(plog_file, "bsapi_release=%s\n", BSAPIVersion());
00146
00147 fputc('\n', plog_file);
00148 fprintf(plog_file, "[processing_info]\n");
00149 fprintf(plog_file, "input_file=%s\n", pInputFile);
00150 fprintf(plog_file, "output_file=%s\n", pOutputFile);
00151 fprintf(plog_file, "processing_start=%s", ctime(&mStartTime));
00152 fprintf(plog_file, "processing_stop=%s", ctime(&mStopTime));
00153 fprintf(plog_file, "duration_sec=%.3f\n", mProcessingTimer.GetInterval());
00154
00155 fclose(plog_file);
00156 return true;
00157 }
00158
00159 protected:
00160 bool mSaveLog;
00161 const char *mpLogSuffix;
00162 time_t mStartTime;
00163 time_t mStopTime;
00164 SSimpleTimer mProcessingTimer;
00165
00166 } gStateHandler;
00167
00168 class SFileSnifferCallback : public SFileSnifferCallbackI
00169 {
00170 public:
00171 SFileSnifferCallback() :
00172 mpSpeechRec(0),
00173 mpOutputDir(0),
00174 mpTransSuffix(0),
00175 mMoveInputToOutput(0),
00176 mResult(true)
00177 {
00178 }
00179
00180 ~SFileSnifferCallback() {}
00181
00182 bool BSAPI_METHOD OnFile(const char *pFile)
00183 {
00184 mResult &= process_file(pFile, 0, mpOutputDir, mpTransSuffix, mMoveInputToOutput, mpSpeechRec);
00185 return true;
00186 }
00187
00188 bool BSAPI_METHOD OnStateChanged(state_type state)
00189 {
00190 switch (state)
00191 {
00192 case stLoopStarted: mResult = true; break;
00193 case stWaitForData: printf("Waiting for input files... Press ENTER to stop.\n"); break;
00194 case stTerminated: break;
00195 }
00196 return true;
00197 }
00198
00199 void SetSpeechRec(SOfflineSpeechRecognizer3I *pSpeechRec) {mpSpeechRec = pSpeechRec;}
00200 void SetOutputDir(const char *pOutputDir) {mpOutputDir = pOutputDir;}
00201 void SetTransSuffix(const char *pTransSuffix) {mpTransSuffix = pTransSuffix;}
00202 void SetMoveInputToOutput(bool moveInputToOutput) {mMoveInputToOutput = moveInputToOutput;}
00203 bool GetResult() {return mResult;}
00204
00205 protected:
00206 SOfflineSpeechRecognizer3I *mpSpeechRec;
00207 const char *mpOutputDir;
00208 const char *mpTransSuffix;
00209 bool mMoveInputToOutput;
00210 bool mResult;
00211
00212 } gFileSnifferHandler;
00213
00214
00215 void help(const char *pProgName)
00216 {
00217 puts("\n Offline speech recognizer ");
00218 printf(" %s\n", BSAPIVersion());
00219 puts(" ======================================================================= ");
00220 puts(" ");
00221 printf(" USAGE: %s [options]\n", pProgName);
00222 puts(" ");
00223 puts(" system configuration: ");
00224 puts(" -c file config file ");
00225 puts(" ");
00226 puts(" input from single file: ");
00227 puts(" -i file input waveform or features file ");
00228 puts(" -j str [waveform] input data type (waveform, features) ");
00229 puts(" -o file output transcription file ");
00230 puts(" ");
00231 puts(" input data type = features: ");
00232 puts(" -u fmt [htk] feature format (ascii, binary, htk) ");
00233 puts(" ");
00234 puts(" input from list file: ");
00235 puts(" -l file list of input [and output files] ");
00236 puts(" ");
00237 puts(" input from directory: ");
00238 puts(" -r dir input directory ");
00239 puts(" -e str [wav] extensions of audio files (comma separated) ");
00240 puts(" -a input directory auto-scan mode ");
00241 puts(" ");
00242 puts(" output specification: ");
00243 puts(" -f type list of output types (comma separated) ");
00244 puts(" str - one best string ");
00245 puts(" lat - lattice (graph with alternative sentences) ");
00246 puts(" cn - confusion network (graph with alternative words)");
00247 puts(" -b flags format flags for transctiption strings ");
00248 puts(" TXT:SEn use pure text format ");
00249 puts(" S,E include start/end time ");
00250 puts(" n deletes the auxiliary symbols ");
00251 puts(" LBF:PSEl use HTK label format ");
00252 puts(" S,E include start/end time ");
00253 puts(" l include word likelihood ");
00254 puts(" P include word confidence ");
00255 puts(" FST:NWPSEl use graph format ");
00256 puts(" N include graph nodes ");
00257 puts(" W include word identification ");
00258 puts(" S,E include start/end time ");
00259 puts(" l include word likelihood ");
00260 puts(" P include word confidence ");
00261 puts(" -h flags format flags for lattices NWPpSELAl ");
00262 puts(" N include graph nodes ");
00263 puts(" W include word identification ");
00264 puts(" P include word posterior probability ");
00265 puts(" p include word posterior probability in log ");
00266 puts(" S,E include start/end time ");
00267 puts(" A include word acoustic likelihood ");
00268 puts(" L include word language likelihood ");
00269 puts(" l include word total likelihood ");
00270 puts(" -k flags format flags for confusion networks TNWPpSE ");
00271 puts(" T include time slot ");
00272 puts(" N include graph nodes ");
00273 puts(" W include word identification ");
00274 puts(" P include word posterior probability ");
00275 puts(" p include word posterior probability in log ");
00276 puts(" S,E include start/end time ");
00277 puts(" -n num [0] maximal number of parallel words in CN (0 means inf) ");
00278 puts(" -p num [-70] minimal posterior probability of word in log for CN ");
00279 puts(" -t str [default] time format (htk, frames, s, ms, hms, dhms, default) ");
00280 puts(" htk - time in htk time units = 100 nanoseconds ");
00281 puts(" frames - time in number of frames = 10 milliseconds");
00282 puts(" s - time in seconds ");
00283 puts(" ms - time in format minutes:seconds ");
00284 puts(" hms - hours:minutes:seconds ");
00285 puts(" dhms - days hours:minutes:seconds ");
00286 puts(" default - htk for str LBF output type, s for others");
00287 puts(" -s str [trn] one best string file suffix ");
00288 puts(" -y str [lat] lattice file suffix ");
00289 puts(" -z str [cn] confusion network file suffix ");
00290 puts(" ");
00291 puts(" common options: ");
00292 puts(" -w fmt [lin16] waveform format (lin16, lin8, alaw, mulaw) ");
00293 puts(" -d dir output directory ");
00294 puts(" -m move input waveform file to output directory ");
00295 puts(" -g save log file ");
00296 puts(" -x str [log] log file suffix ");
00297 puts(" -v verbose mode ");
00298 puts(" ");
00299 }
00300
00301
00302 void change_file_path(char *pFileName, const char *pNewPath)
00303 {
00304 char pbuff[1024];
00305 strcpy(pbuff, pFileName);
00306
00307 char *psep_pos1 = strrchr(pbuff, '/');
00308 char *psep_pos2 = strrchr(pbuff, '\\');
00309 char *psep_pos = (psep_pos1 > psep_pos2 ? psep_pos1 : psep_pos2);
00310
00311 if(!psep_pos)
00312 return;
00313
00314 strcpy(pFileName, pNewPath);
00315 strcat(pFileName, psep_pos);
00316 }
00317
00318 void change_file_suffix(char *pFileName, const char *pNewSuffix)
00319 {
00320 char *pdot_pos = strrchr(pFileName, '.');
00321 char *psep_pos1 = strrchr(pFileName, '/');
00322 char *psep_pos2 = strrchr(pFileName, '\\');
00323 char *psep_pos = (psep_pos1 > psep_pos2 ? psep_pos1 : psep_pos2);
00324
00325 if(!pdot_pos || (psep_pos && psep_pos > pdot_pos))
00326 {
00327 strcat(pFileName, ".");
00328 strcat(pFileName, pNewSuffix);
00329 }
00330 else
00331 {
00332 strcpy(pdot_pos + 1, pNewSuffix);
00333 }
00334 }
00335
00336 void cutof_file_suffix(char *pFileName)
00337 {
00338 char *sep_pos1 = strrchr(pFileName, '/');
00339 char *sep_pos2 = strrchr(pFileName, '\\');
00340 char *dot_pos = strrchr(pFileName, '.');
00341 char *sep_pos = (sep_pos1 > sep_pos2 ? sep_pos1 : sep_pos2);
00342 if(dot_pos && (!sep_pos || dot_pos > sep_pos))
00343 *dot_pos = '\0';
00344 }
00345
00346 void get_file_path(const char *pFileName, char *pRetPath)
00347 {
00348 strcpy(pRetPath, pFileName);
00349
00350 char *psep_pos1 = strrchr(pRetPath, '/');
00351 char *psep_pos2 = strrchr(pRetPath, '\\');
00352 char *psep_pos = (psep_pos1 > psep_pos2 ? psep_pos1 : psep_pos2);
00353
00354 if(psep_pos)
00355 *psep_pos = '\0';
00356 else
00357 pRetPath[0] = '\0';
00358 }
00359
00360 void get_file_suffix(const char *pFileName, char *pRetSuffix)
00361 {
00362 const char *psep_pos1 = strrchr(pFileName, '/');
00363 const char *psep_pos2 = strrchr(pFileName, '\\');
00364 const char *pdot_pos = strrchr(pFileName, '.');
00365 const char *psep_pos = (psep_pos1 > psep_pos2 ? psep_pos1 : psep_pos2);
00366
00367 *pRetSuffix = '\0';
00368 if(pdot_pos && (!psep_pos || pdot_pos > psep_pos))
00369 strcpy(pRetSuffix, pdot_pos + 1);
00370 }
00371
00372 bool rename_file(const char *pOldFile, const char *pNewFile)
00373 {
00374 return (rename(pOldFile, pNewFile) == 0);
00375 }
00376
00377 bool process_file(
00378 const char *pInputFile,
00379 const char *pOutputFile,
00380 const char *pOutputDir,
00381 const char *pTransSuffix,
00382 bool moveInputFile,
00383 SOfflineSpeechRecognizer3I *pSpeechRec)
00384 {
00385 char poutput_file[1024];
00386 poutput_file[sizeof(poutput_file) - 1] = '\0';
00387
00388 if (!pOutputFile || strlen(pOutputFile) == 0)
00389 {
00390 strncpy(poutput_file, pInputFile, sizeof(poutput_file) - 1);
00391 if (pOutputDir)
00392 change_file_path(poutput_file, pOutputDir);
00393 if(strlen(pTransSuffix) == 0 || strcmp(pTransSuffix, "") == 0)
00394 cutof_file_suffix(poutput_file);
00395 else
00396 change_file_suffix(poutput_file, pTransSuffix);
00397 pOutputFile = poutput_file;
00398 }
00399
00400 if (!pOutputDir)
00401 {
00402 char poutput_dir[1024];
00403 poutput_dir[sizeof(poutput_dir) - 1] = '\0';
00404 get_file_path(pOutputFile, poutput_dir);
00405 pSpeechRec->SetOutputDirectory(poutput_dir);
00406 }
00407
00408 if (!pSpeechRec->ProcessFile(pInputFile, pOutputFile))
00409 return false;
00410
00411 if (moveInputFile)
00412 {
00413 char psuffix[1024];
00414 psuffix[sizeof(psuffix) - 1] = '\0';
00415 get_file_suffix(pInputFile, psuffix);
00416
00417 if (poutput_file != pOutputFile)
00418 strncpy(poutput_file, pOutputFile, sizeof(poutput_file) - 1);
00419
00420 change_file_suffix(poutput_file, psuffix);
00421 if (strcmp(pInputFile, poutput_file) != 0)
00422 rename_file(pInputFile, poutput_file);
00423 }
00424
00425 return true;
00426 }
00427
00428 bool process_list(
00429 const char *pListFile,
00430 const char *pOutputDir,
00431 const char *pTransSuffix,
00432 bool moveInputFile,
00433 SOfflineSpeechRecognizer3I *pSpeechRec)
00434 {
00435 SFileListI *pfile_list = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00436 if (!pfile_list)
00437 {
00438 fprintf(stderr, "ERROR: Memory allocation error.\n");
00439 return false;
00440 }
00441
00442 pfile_list->SetErrorHandler(&gErrorHandler);
00443 pfile_list->SetVerbose(false);
00444 pfile_list->SetTarget(0);
00445 pfile_list->Clear();
00446
00447 if (!pfile_list->AddList(pListFile))
00448 {
00449 pfile_list->Release();
00450 return false;
00451 }
00452
00453 char ptarget[1024];
00454 char psource[1024];
00455 int start;
00456 int end;
00457 float prob;
00458
00459 bool result = true;
00460 pfile_list->FirstLine();
00461 while (pfile_list->GetLine(ptarget, psource, &start, &end, &prob))
00462 {
00463 result &= process_file(psource, ptarget, pOutputDir, pTransSuffix, moveInputFile, pSpeechRec);
00464 }
00465
00466 pfile_list->Release();
00467 return result;
00468 }
00469
00470 bool process_dir(const char *pInputDir, const char *pSuffixes, bool asyncMode)
00471 {
00472 SFileSnifferI *psniffer = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00473 if (!psniffer)
00474 {
00475 fprintf(stderr, "ERROR: Memory allocation error.\n");
00476 return false;
00477 }
00478
00479 psniffer->SetErrorHandler(&gErrorHandler);
00480 psniffer->SetTarget(&gFileSnifferHandler);
00481 psniffer->SetUseLocking(asyncMode);
00482
00483 if (!psniffer->AddDirectory(pInputDir))
00484 {
00485 psniffer->Release();
00486 return false;
00487 }
00488
00489 char psuffixes[1024];
00490 psuffixes[sizeof(psuffixes) - 1] = '\0';
00491 strncpy(psuffixes, pSuffixes, sizeof(psuffixes) - 1);
00492 char *psuffix = strtok(psuffixes, ",; ");
00493 while (psuffix)
00494 {
00495 psniffer->AddWantedSuffix(psuffix);
00496 psuffix = strtok(0, ",; ");
00497 }
00498
00499 if (asyncMode)
00500 {
00501 if (!psniffer->ProcessFilesAsync())
00502 {
00503 psniffer->Release();
00504 return false;
00505 }
00506
00507 getchar();
00508 psniffer->Terminate();
00509 }
00510 else
00511 {
00512 psniffer->ProcessFiles();
00513 }
00514
00515 psniffer->Release();
00516 return gFileSnifferHandler.GetResult();
00517 }
00518
00519 int main(int argc, char *pArgs[])
00520 {
00521 const char *pinput_file = 0;
00522 const char *pinput_dir = 0;
00523 const char *poutput_file = 0;
00524 const char *poutput_dir = 0;
00525 const char *plist_file = 0;
00526 const char *pconfig_file = 0;
00527 const char *pwave_fmt = "lin16";
00528 const char *psuffixes = "wav";
00529 const char *pone_best_suffix = "trn";
00530 const char *plattice_suffix = "lat";
00531 const char *pcn_suffix = "cn";
00532 const char *plog_suffix = "log";
00533 const char *pin_data_type = SID_DEF_IN_DATA_TYPE;
00534 const char *pfea_fmt = SID_DEF_FEA_FMT;
00535 bool auto_dir_scan_mode = false;
00536 bool move_input_to_ouput = false;
00537 const char *poutput_format= "str";
00538 unsigned int output_types = 0;
00539 const char *pone_best_output_format = "TXT";
00540 const char *plattice_output_format = "NWPSELAl";
00541 const char *pcn_output_format = "TWPSE";
00542 const char *ptimet_format = "default";
00543 int max_parallel_words = 0;
00544 float min_word_log_post = -70.0f;
00545
00546
00547 if(argc == 1)
00548 {
00549 help(pArgs[0]);
00550 return 0;
00551 }
00552
00553 optind = 0;
00554 while (1)
00555 {
00556 int c = getopt(argc, pArgs, const_cast<char *>("-i:o:r:e:af:b:h:k:n:p:t:w:d:l:c:s:y:z:mgx:j:u:v"));
00557 if(c == -1)
00558 break;
00559
00560 switch(c)
00561 {
00562 case 'i':
00563 pinput_file = optarg;
00564 break;
00565 case 'o':
00566 poutput_file = optarg;
00567 break;
00568 case 'r':
00569 pinput_dir = optarg;
00570 break;
00571 case 'e':
00572 psuffixes = optarg;
00573 break;
00574 case 'a':
00575 auto_dir_scan_mode = true;
00576 break;
00577 case 'f':
00578 poutput_format = optarg;
00579 break;
00580 case 'b':
00581 pone_best_output_format = optarg;
00582 break;
00583 case 'h':
00584 plattice_output_format = optarg;
00585 break;
00586 case 'k':
00587 pcn_output_format = optarg;
00588 break;
00589 case 'n':
00590 if(sscanf(optarg, "%d", &max_parallel_words) != 1 || max_parallel_words < 0)
00591 {
00592 fprintf(stderr, "ERROR: Invalid maximal number of parallel words in CN '%s'. "
00593 "Must be positive integer or 0.\n", optarg);
00594 return 1;
00595 }
00596 break;
00597 case 'p':
00598 if(sscanf(optarg, "%f", &min_word_log_post) != 1 || min_word_log_post > 0)
00599 {
00600 fprintf(stderr, "ERROR: Invalid minimal posterior probability of word in log for CN '%s'. "
00601 "Must be negative floating point number.\n", optarg);
00602 return 1;
00603 }
00604 break;
00605 case 't':
00606 ptimet_format = optarg;
00607 break;
00608 case 'w':
00609 pwave_fmt = optarg;
00610 break;
00611 case 'd':
00612 poutput_dir = optarg;
00613 break;
00614 case 'l':
00615 plist_file = optarg;
00616 break;
00617 case 'c':
00618 pconfig_file = optarg;
00619 break;
00620 case 's':
00621 pone_best_suffix = optarg;
00622 break;
00623 case 'y':
00624 plattice_suffix = optarg;
00625 break;
00626 case 'z':
00627 pcn_suffix = optarg;
00628 break;
00629 case 'm':
00630 move_input_to_ouput = true;
00631 break;
00632 case 'g':
00633 gStateHandler.SetSaveLog(true);
00634 break;
00635 case 'x':
00636 plog_suffix = optarg;
00637 break;
00638 case 'j':
00639 pin_data_type = optarg;
00640 break;
00641 case 'u':
00642 pfea_fmt = optarg;
00643 break;
00644 case 'v':
00645 gErrorHandler.SetVerbose(true);
00646 break;
00647 case '?':
00648 fprintf(stderr, "ERROR: Command line parsing error.\n");
00649 return 1;
00650 default :
00651 fprintf(stderr, "ERROR: Command line parsing error. Unexpected argument '%s'.\n", optarg);
00652 return 1;
00653 }
00654 }
00655
00656 if (!pconfig_file)
00657 {
00658 fprintf(stderr, "ERROR: Config file (-c) must be specified.\n");
00659 return 1;
00660 }
00661
00662 if (!pinput_file && !plist_file && !pinput_dir)
00663 {
00664 fprintf(stderr, "ERROR: Either input file (-i), list of files (-l) or directory (-r) must be specified.\n");
00665 return 1;
00666 }
00667
00668 SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00669 if (plicman)
00670 {
00671 plicman->SetErrorHandler(&gErrorHandler);
00672 plicman->RegisterLicenseFile("license.dat");
00673 }
00674
00675 SOfflineSpeechRecognizer3I *psrec = static_cast<SOfflineSpeechRecognizer3I *>(BSAPICreateInstance(SIID_OFFLINESREC3));
00676 if (!psrec)
00677 {
00678 return 1;
00679 }
00680
00681 psrec->SetErrorHandler(&gErrorHandler);
00682
00683 if (!psrec->Init(pconfig_file))
00684 {
00685 psrec->Release();
00686 return 1;
00687 }
00688
00689 gStateHandler.SetLogSuffix(plog_suffix);
00690 psrec->SetStateHandler(&gStateHandler);
00691
00692 char pout_format[128];
00693 snprintf(pout_format, sizeof(pout_format) -1 , ",%s,", poutput_format);
00694 pout_format[sizeof(pout_format) - 1] = '\0';
00695
00696 if(strstr(pout_format, ",str,"))
00697 {
00698 output_types |= OOT_ONEBEST;
00699 }
00700
00701 if(strstr(pout_format, ",lat,"))
00702 {
00703 output_types |= OOT_LATTICE;
00704 }
00705
00706 if(strstr(pout_format, ",cn,"))
00707 {
00708 output_types |= OOT_CONFUSIONNETWORK;
00709 }
00710
00711 psrec->SetOutputTypes(output_types);
00712
00713 psrec->SetOneBestOutputFormat(pone_best_output_format);
00714 psrec->SetLatticeOutputFormat(plattice_output_format);
00715 psrec->SetConfusionNetworkOutputFormat(pcn_output_format);
00716
00717 psrec->SetOutputOneBestFileSuffix(pone_best_suffix);
00718 psrec->SetOutputLatticeFileSuffix(plattice_suffix);
00719 psrec->SetOutputConfusionNetworkFileSuffix(pcn_suffix);
00720
00721 psrec->SetTimeFormat(ptimet_format);
00722
00723 SBlockSetI *pbset = psrec->GetBlockSet();
00724 if (!psrec)
00725 {
00726 psrec->Release();
00727 return 1;
00728 }
00729
00730
00731 SWaveformFormatConvertorI *pwc = static_cast<SWaveformFormatConvertorI *>(pbset->GetBlock("waveform_convertor"));
00732 if (!pwc)
00733 {
00734 psrec->Release();
00735 return 1;
00736 }
00737
00738 pwc->SetInputFormatStr(pwave_fmt);
00739
00740
00741 SFSTToConfusionNetworkConvertorI *pfst_cn = static_cast<SFSTToConfusionNetworkConvertorI *>(pbset->GetBlock("fst_cn_convertor"));
00742 if (!pfst_cn)
00743 {
00744 psrec->Release();
00745 return 1;
00746 }
00747
00748 pfst_cn->SetMaxParallelWords(max_parallel_words);
00749 pfst_cn->SetMinWordLogPosteriorProbability(min_word_log_post);
00750
00751 if(pbset->Exists("feature_source"))
00752 {
00753 SFeatureSourceI *pfeature_source = static_cast<SFeatureSourceI *>(pbset->GetBlock("feature_source"));
00754 if(!pfeature_source)
00755 {
00756 psrec->Release();
00757 return 1;
00758 }
00759
00760 if (!pfeature_source->SetFileFormatStr(pfea_fmt))
00761 {
00762 psrec->Release();
00763 return 1;
00764 }
00765 }
00766
00767 if (poutput_dir)
00768 psrec->SetOutputDirectory(poutput_dir);
00769
00770 if(!psrec->SetInputDataTypeStr(pin_data_type))
00771 {
00772 psrec->Release();
00773 return 1;
00774 }
00775
00776 bool result = true;
00777 if (pinput_file)
00778 result = process_file(pinput_file, poutput_file, poutput_dir, "", move_input_to_ouput, psrec);
00779
00780 if (plist_file)
00781 result = process_list(plist_file, poutput_dir, "", move_input_to_ouput, psrec);
00782
00783 if (pinput_dir)
00784 {
00785 gFileSnifferHandler.SetOutputDir(poutput_dir);
00786 gFileSnifferHandler.SetTransSuffix("");
00787 gFileSnifferHandler.SetMoveInputToOutput(move_input_to_ouput);
00788 gFileSnifferHandler.SetSpeechRec(psrec);
00789 result = process_dir(pinput_dir, psuffixes, auto_dir_scan_mode);
00790 }
00791
00792 psrec->Release();
00793 return (result ? 0 : 1);
00794 }