00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <stdio.h>
00011 #include <stdlib.h>
00012 #include <errno.h>
00013 #include <stdarg.h>
00014 #include <cstring>
00015 #include <string>
00016 #include <cassert>
00017
00018 #include "bsapi.h"
00019 #include "getopt.h"
00020 #include "labtarget.h"
00021
00022 #ifdef WIN32
00023 #define DIRSEP "\\"
00024 #else
00025 #define DIRSEP "/"
00026 #endif
00027
00028 #define KWS_LIC_SECS_TO_EXPIRE_WARN (10 * 24 * 3600)
00029
00030 SLabelTarget gHypTarget;
00031 SLabelTarget gDetTarget;
00032
00033
00034
00035
00036 class ErrorHandler : public SErrorCallbackI
00037 {
00038 public:
00039 ErrorHandler() : mVerbose(false) {;}
00040 virtual void BSAPI_METHOD OnTextMessage(SUnknownI *pSender, message_type type, unsigned int messageId, const char *pMessage)
00041 {
00042 unsigned int iid = pSender ? pSender->GetIID() : SIID_UNDEFINED;
00043 switch(type)
00044 {
00045 case mtError:
00046 fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00047 break;
00048 case mtWarning:
00049 fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00050 break;
00051 case mtLog:
00052 LogMessage(pMessage);
00053 break;
00054 }
00055 }
00056
00057 void LogMessage(const char *pMessage, ...)
00058 {
00059 if (mVerbose)
00060 {
00061 va_list ap;
00062 va_start(ap, pMessage);
00063 vfprintf(stdout, pMessage, ap);
00064 fprintf(stdout, "\n");
00065 va_end(ap);
00066 }
00067 }
00068
00069 void SetVerbose(bool verbose) {mVerbose = verbose;}
00070
00071 protected:
00072 bool mVerbose;
00073 } gErrorHandler;
00074
00075 std::string change_file_suffix(const char *pFilename, const char *pNewSuffix)
00076 {
00077 assert(pFilename);
00078 assert(pNewSuffix);
00079 std::string res = pFilename;
00080 size_t pos_dot = res.rfind('.');
00081 size_t pos_slash1 = res.rfind('/');
00082 size_t pos_slash2 = res.rfind('\\');
00083 size_t pos_slash = std::string::npos;
00084 if (pos_slash1 != std::string::npos && pos_slash2 != std::string::npos)
00085 pos_slash = pos_slash1 > pos_slash2 ? pos_slash1 : pos_slash2;
00086 else
00087 pos_slash = pos_slash1 != std::string::npos ? pos_slash1 : pos_slash2;
00088
00089 if (pos_dot == std::string::npos || (pos_slash != std::string::npos && pos_slash > pos_dot))
00090 res += pNewSuffix;
00091 else
00092 res.replace(pos_dot, strlen(pNewSuffix), pNewSuffix);
00093
00094 return res;
00095 }
00096
00097 void help()
00098 {
00099 puts("\n Keyword spotting ");
00100 printf(" %s\n", BSAPIVersion());
00101 puts(" ================================================================ ");
00102 puts(" ");
00103 puts(" USAGE: kws_cmd [options] ");
00104 puts(" ");
00105 puts(" system configuration: ");
00106 puts(" -c file configuration file ");
00107 puts(" ");
00108 puts(" processing one file: ");
00109 puts(" -i file input file ");
00110 puts(" -o file detection file ");
00111 puts(" -h file hypothesis file ");
00112 puts(" ");
00113 puts(" processing list of files: ");
00114 puts(" -l file list of input files ");
00115 puts(" -p file detection master file (for all input files) ");
00116 puts(" -q file hypothesis master file (for all input files) ");
00117 puts(" -o \"none\" will not produce detection file per each file ");
00118 puts(" -h \"none\" will not produce hypothesis file per each file");
00119 puts(" ");
00120 puts(" processing directory: ");
00121 puts(" -d dir input directory ");
00122 puts(" -e str [raw] extension of audio files ");
00123 puts(" -p file detection master file (for all input files) ");
00124 puts(" -q file hypothesis master file (for all input files) ");
00125 puts(" -o \"none\" will not produce detection file per each file ");
00126 puts(" -h \"none\" will not produce hypothesis file per each file");
00127 puts(" ");
00128 puts(" processing features instead of waveform: ");
00129 puts(" -f enable processing of features ");
00130 puts(" -a str [idx] extension of index file ");
00131 puts(" -b str [fea] extension of feature files ");
00132 puts(" ");
00133 puts(" NOTE: in case of -l or -d the output will be written ");
00134 puts(" to *.hyp and *.det where * is the audio filename ");
00135 puts(" without extension. Can be switched off by -o none, -h none ");
00136 puts(" ");
00137 puts(" other options: ");
00138 puts(" -v verbose mode ");
00139 puts(" -s fmt [mlf] time format (seconds, mlf) ");
00140 puts(" -y file list of keywords and pronunciations in XML ");
00141 puts(" -t file XSLT to be linked with the list of keywords ");
00142 puts(" -w fmt [lin16] waveform format (lin16, lin8, alaw, mulaw) ");
00143 puts(" -n num [1] number of channels in audio files, takes into ");
00144 puts(" account for non header files (.raw) ");
00145 puts(" -x file list of keywords in XML format ");
00146 puts(" -k file list of keywords in plain text format ");
00147 puts(" -g network_file load the kws net from this file instead of using netgen");
00148 puts(" -j keep output label unchanged (with pronunciation variant)");
00149 puts(" ");
00150 }
00151
00152 int main(int argc, char **argv)
00153 {
00154
00155 char *pconfig_file = 0;
00156 char *pinput_file = 0;
00157 char *plist_file = 0;
00158 char *pinput_dir = 0;
00159 const char *pwave_ext = "raw";
00160 const char *pwave_fmt = "lin16";
00161 char *pkwlxml_file = 0;
00162 char *pkwltxt_file = 0;
00163 char *pkwlxsl_file = 0;
00164 char *poutdet_file = 0;
00165 char *pouthyp_file = 0;
00166 char *poutdetm_file = 0;
00167 char *pouthypm_file = 0;
00168 const char *ptime_fmt = "mlf";
00169 char *poutkwl_file = 0;
00170 char *pnetwork_file = 0;
00171 int nchannels = 1;
00172 bool pronvariant_in_label = false;
00173 bool process_fea = false;
00174 const char *pidx_ext = "idx";
00175 const char *pfea_ext = "fea";
00176
00177
00178 if(argc == 1)
00179 {
00180 help();
00181 return 0;
00182 }
00183
00184 optind = 0;
00185 while (1)
00186 {
00187 int c = getopt(argc, argv, const_cast<char *>("-c:i:l:d:e:w:n:x:k:o:h:p:q:s:y:t:vg:jfa:b:"));
00188 if(c == -1)
00189 break;
00190
00191 switch(c)
00192 {
00193 case 'c':
00194 pconfig_file = optarg;
00195 break;
00196 case 'i':
00197 pinput_file = optarg;
00198 break;
00199 case 'l':
00200 plist_file = optarg;
00201 break;
00202 case 'd':
00203 pinput_dir = optarg;
00204 break;
00205 case 'e':
00206 pwave_ext = optarg;
00207 break;
00208 case 'w':
00209 pwave_fmt = optarg;
00210 break;
00211 case 'n':
00212 if(sscanf(optarg, "%d", &nchannels) != 1 || nchannels < 1)
00213 {
00214 fprintf(stderr, "ERROR: Invalid number of channels: %s.\n", optarg);
00215 return 1;
00216 }
00217 break;
00218 case 'x':
00219 pkwlxml_file = optarg;
00220 break;
00221 case 'k':
00222 pkwltxt_file = optarg;
00223 break;
00224 case 'o':
00225 poutdet_file = optarg;
00226 break;
00227 case 'h':
00228 pouthyp_file = optarg;
00229 break;
00230 case 'p':
00231 poutdetm_file = optarg;
00232 break;
00233 case 'q':
00234 pouthypm_file = optarg;
00235 break;
00236 case 's':
00237 ptime_fmt = optarg;
00238 break;
00239 case 'y':
00240 poutkwl_file = optarg;
00241 break;
00242 case 't':
00243 pkwlxsl_file = optarg;
00244 break;
00245 case 'f':
00246 process_fea = true;
00247 break;
00248 case 'a':
00249 pidx_ext = optarg;
00250 break;
00251 case 'b':
00252 pfea_ext = optarg;
00253 break;
00254 case 'v':
00255 gErrorHandler.SetVerbose(true);
00256 break;
00257 case 'g':
00258 pnetwork_file = optarg;
00259 break;
00260 case 'j':
00261 pronvariant_in_label = true;
00262 break;
00263 case '?':
00264 fprintf(stderr, "ERROR: Command line parsing error.\n");
00265 return 1;
00266 default :
00267 fprintf(stderr, "ERROR: Command line parsing error. Unexpected argument '%s'.\n", optarg);
00268 return 1;
00269 }
00270 }
00271
00272 if (!((pkwlxml_file == 0) ^ (pkwltxt_file == 0)))
00273 {
00274 fprintf(stderr, "ERROR: List of keywords must be given either in XML (-x) or plain text format (-k).\n");
00275 return 1;
00276 }
00277
00278
00279 SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00280 if (plicman)
00281 {
00282 plicman->SetErrorHandler(&gErrorHandler);
00283 plicman->RegisterLicenseFile("license.dat");
00284 }
00285
00286 gHypTarget.SetTimeFormatStr(ptime_fmt);
00287 gDetTarget.SetTimeFormatStr(ptime_fmt);
00288
00289 SKeywordSpottingI *pkws = static_cast<SKeywordSpottingI *>(BSAPICreateInstance(SIID_KWS));
00290 if(!pkws)
00291 {
00292 return 1;
00293 }
00294
00295 pkws->SetErrorHandler(&gErrorHandler);
00296
00297 if (plicman)
00298 {
00299 long_long secs_to_expire = plicman->GetSecsToExpire(SIID_KWS);
00300 if (secs_to_expire < KWS_LIC_SECS_TO_EXPIRE_WARN)
00301 fprintf(stderr, "WARNING: License will expire in %d day(s).\n", static_cast<int>(secs_to_expire / (24 * 3600)));
00302 }
00303
00304
00305 char pdefault_cfg[1024];
00306 sprintf(pdefault_cfg, "settings%smain_config", DIRSEP);
00307 if(!pkws->Init((pconfig_file ? pconfig_file : pdefault_cfg)))
00308 {
00309 pkws->Release();
00310 return 1;
00311 }
00312
00313 SBlockSetI *pbset = pkws->GetBlockSet();
00314 if (!pbset)
00315 {
00316 pkws->Release();
00317 return 1;
00318 }
00319
00320 if (pnetwork_file)
00321 {
00322 SDecoderI *pdecoder = static_cast<SDecoderI *>(pbset->GetBlock("decoder"));
00323 if (!pdecoder)
00324 {
00325 pkws->Release();
00326 return 1;
00327 }
00328
00329 if (!pdecoder->LoadNetwork(pnetwork_file))
00330 {
00331 pkws->Release();
00332 return 1;
00333 }
00334 }
00335
00336 if (pronvariant_in_label) {
00337 gHypTarget.SetDoNotChangeLabel(true);
00338 gDetTarget.SetDoNotChangeLabel(true);
00339 }
00340
00341
00342 SWaveformFormatConvertorI *pwc = static_cast<SWaveformFormatConvertorI *>(pbset->GetBlock("waveform_convertor"));
00343 if (pwc)
00344 {
00345 pwc->SetNChannels(nchannels);
00346 pwc->SetInputFormatStr(pwave_fmt);
00347 }
00348
00349 if (pbset->Exists("data_node"))
00350 {
00351 SDataStreamNodeI *pdata_node = static_cast<SDataStreamNodeI *>(pbset->GetBlock("data_node"));
00352 if (pdata_node)
00353 {
00354 pdata_node->SetIndexFileSuffix(pidx_ext);
00355 pdata_node->SetFeatureFileSuffix(pfea_ext);
00356 }
00357 }
00358
00359
00360 SKeywordListI * mpKeywordList;
00361 mpKeywordList=pkws->GetKeywordList();
00362 if(!mpKeywordList)
00363 {
00364 pkws->Release();
00365 return 1;
00366 }
00367
00368 if(!mpKeywordList->Load(
00369 pkwlxml_file ? pkwlxml_file : pkwltxt_file,
00370 pkwlxml_file ? SKeywordListI::ffXML : SKeywordListI::ffPlain))
00371 {
00372 pkws->Release();
00373 return 1;
00374 }
00375
00376 if(!mpKeywordList->AddPronunciations(SKeywordListI::apmCondAdd))
00377 {
00378 pkws->Release();
00379 return 1;
00380 }
00381
00382 if(!mpKeywordList->CheckKeywords())
00383 {
00384 pkws->Release();
00385 return 1;
00386 }
00387
00388 if(poutkwl_file && !mpKeywordList->Save(poutkwl_file, pkwlxsl_file))
00389 {
00390 pkws->Release();
00391 return 1;
00392 }
00393
00394
00395 pkws->SetDetectionsTarget(&gDetTarget);
00396 pkws->SetHypothesesTarget(&gHypTarget);
00397
00398 gHypTarget.StartProcessing(pouthypm_file);
00399 gDetTarget.StartProcessing(poutdetm_file);
00400
00401
00402 if(pinput_file)
00403 {
00404 gErrorHandler.LogMessage("Processing file: %s", pinput_file);
00405
00406 gHypTarget.StartProcessingFile(pinput_file, pouthyp_file);
00407 gDetTarget.StartProcessingFile(pinput_file, poutdet_file);
00408 if(!pkws->ProcessFile(pinput_file, 0, 0, (!process_fea ? SKeywordSpottingI::kiWaveform : SKeywordSpottingI::kiPosteriors)))
00409 {
00410 pkws->Release();
00411 return 1;
00412 }
00413 gHypTarget.FinishProcessingFile();
00414 gDetTarget.FinishProcessingFile();
00415 }
00416
00417
00418 if(plist_file)
00419 {
00420 SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00421 if(!plist)
00422 {
00423 fprintf(stderr, "Memory allocation error.");
00424 pkws->Release();
00425 return 1;
00426 }
00427 plist->SetErrorHandler(&gErrorHandler);
00428 if(!plist->AddList(plist_file))
00429 {
00430 plist->Release();
00431 pkws->Release();
00432 return 1;
00433 }
00434
00435 plist->FirstLine();
00436 char ptarget[1024];
00437 char psource[1024];
00438 int start;
00439 int end;
00440 float prob;
00441 while(plist->GetLine(ptarget, psource, &start, &end, &prob))
00442 {
00443 gErrorHandler.LogMessage("Processing file: %s", psource);
00444
00445 std::string outhyp_file = change_file_suffix(psource, ".hyp");
00446 std::string outdet_file = change_file_suffix(psource, ".det");
00447
00448 if( !pouthyp_file || (pouthyp_file && strcmp(pouthyp_file,"none") != 0 && strcmp(pouthyp_file,"") != 0 )){
00449 gHypTarget.StartProcessingFile(psource, outhyp_file.c_str());
00450 }else{
00451 gHypTarget.StartProcessingFile(psource, 0);
00452 }
00453
00454 if( !poutdet_file || (poutdet_file && strcmp(poutdet_file,"none") != 0 && strcmp(poutdet_file,"") != 0 )){
00455 gDetTarget.StartProcessingFile(psource, outdet_file.c_str());
00456 }else{
00457 gDetTarget.StartProcessingFile(psource, 0);
00458 }
00459
00460 if(!pkws->ProcessFile(psource, 0, 0, (!process_fea ? SKeywordSpottingI::kiWaveform : SKeywordSpottingI::kiPosteriors)))
00461 {
00462 plist->Release();
00463 pkws->Release();
00464 return 1;
00465 }
00466 gHypTarget.FinishProcessingFile();
00467 gDetTarget.FinishProcessingFile();
00468 }
00469 plist->Release();
00470 }
00471
00472
00473
00474 if(pinput_dir)
00475 {
00476 SFileSnifferI *psniffer = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00477 if(!psniffer)
00478 {
00479 fprintf(stderr, "Memory allocation error.");
00480 pkws->Release();
00481 return 1;
00482 }
00483 psniffer->SetErrorHandler(&gErrorHandler);
00484 psniffer->AddDirectory(pinput_dir);
00485 psniffer->AddWantedSuffix(const_cast<char *>(pwave_ext));
00486
00487 if(!psniffer->FirstFile())
00488 {
00489 psniffer->Release();
00490 pkws->Release();
00491 return 1;
00492 }
00493
00494 char psource[1024];
00495 while(psniffer->GetFile(psource, sizeof(psource) - 1))
00496 {
00497 gErrorHandler.LogMessage("Processing file: %s", psource);
00498
00499 std::string outhyp_file = change_file_suffix(psource, ".hyp");
00500 std::string outdet_file = change_file_suffix(psource, ".det");
00501
00502 if( !pouthyp_file || (pouthyp_file && strcmp(pouthyp_file,"none") != 0 && strcmp(pouthyp_file,"") != 0 )){
00503 gHypTarget.StartProcessingFile(psource, outhyp_file.c_str());
00504 }else{
00505 gHypTarget.StartProcessingFile(psource, 0);
00506 }
00507
00508 if( !poutdet_file || (poutdet_file && strcmp(poutdet_file,"none") != 0 && strcmp(poutdet_file,"") != 0 )){
00509 gDetTarget.StartProcessingFile(psource, outdet_file.c_str());
00510 }else{
00511 gDetTarget.StartProcessingFile(psource, 0);
00512 }
00513
00514 if(!pkws->ProcessFile(psource, 0, 0, (!process_fea ? SKeywordSpottingI::kiWaveform : SKeywordSpottingI::kiPosteriors)))
00515 {
00516 psniffer->Release();
00517 pkws->Release();
00518 return 1;
00519 }
00520 gHypTarget.FinishProcessingFile();
00521 gDetTarget.FinishProcessingFile();
00522 }
00523 psniffer->Release();
00524 }
00525
00526 gHypTarget.FinishProcessing();
00527 gDetTarget.FinishProcessing();
00528
00529 pkws->Release();
00530 return 0;
00531 }