kws.cpp

This example shows how to use the SKeywordSpottingI interface.

00001 /*****************************************************************
00002  *  BSAPI Keyword Spotting Example                               *
00003  *                                                               *
00004  *  Author      : Petr Schwarz, Pavel Matejka, Tomas Cipr        *
00005  *  Copyright   : (C) 2006-2011 by Phonexia s.r.o                *
00006  *                                                               *
00007  *  For more info, please contact us at support@phonexia.com     *
00008  *****************************************************************/
00009 
00010 #include <stdio.h>
00011 #include <stdlib.h>
00012 #include <errno.h>
00013 #include <stdarg.h>
00014 #include <cstring>
00015 #include <string>
00016 #include <cassert>
00017 
00018 #include "bsapi.h"
00019 #include "getopt.h"
00020 #include "labtarget.h"
00021 
00022 #ifdef WIN32
00023   #define DIRSEP "\\"
00024 #else
00025   #define DIRSEP "/"
00026 #endif
00027 
00028 #define KWS_LIC_SECS_TO_EXPIRE_WARN  (10 * 24 * 3600)
00029 
00030 SLabelTarget gHypTarget;
00031 SLabelTarget gDetTarget;
00032 
00033 // This is an error handling object. If an error occur, a message is sent to this object at first. 
00034 // Then the running function will exit with the false or 0 return value. The object also accept 
00035 // warning and logging messages.
00036 class ErrorHandler : public SErrorCallbackI 
00037 {
00038   public:
00039     ErrorHandler() : mVerbose(false) {;}
00040     virtual void BSAPI_METHOD OnTextMessage(SUnknownI *pSender, message_type type, unsigned int messageId, const char *pMessage)
00041     {
00042       unsigned int iid = pSender ? pSender->GetIID() : SIID_UNDEFINED;
00043       switch(type)
00044       {
00045         case mtError:
00046           fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00047           break;
00048         case mtWarning:
00049           fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00050           break;
00051         case mtLog:
00052           LogMessage(pMessage);
00053           break;
00054       }
00055     }
00056 
00057     void LogMessage(const char *pMessage, ...)
00058     {
00059       if (mVerbose)
00060       {
00061         va_list ap;
00062         va_start(ap, pMessage);
00063         vfprintf(stdout, pMessage, ap);
00064         fprintf(stdout, "\n");
00065         va_end(ap);
00066       }
00067     }
00068     
00069     void SetVerbose(bool verbose) {mVerbose = verbose;}
00070   
00071   protected:
00072     bool mVerbose;
00073 } gErrorHandler;
00074 
00075 std::string change_file_suffix(const char *pFilename, const char *pNewSuffix)
00076 {
00077   assert(pFilename);
00078   assert(pNewSuffix);
00079   std::string res = pFilename;
00080   size_t pos_dot = res.rfind('.');
00081   size_t pos_slash1 = res.rfind('/');
00082   size_t pos_slash2 = res.rfind('\\');
00083   size_t pos_slash = std::string::npos;
00084   if (pos_slash1 != std::string::npos && pos_slash2 != std::string::npos)
00085     pos_slash = pos_slash1 > pos_slash2 ? pos_slash1 : pos_slash2;
00086   else
00087     pos_slash = pos_slash1 != std::string::npos ? pos_slash1 : pos_slash2;
00088 
00089   if (pos_dot == std::string::npos || (pos_slash != std::string::npos && pos_slash > pos_dot))
00090     res += pNewSuffix;
00091   else
00092     res.replace(pos_dot, strlen(pNewSuffix), pNewSuffix);
00093 
00094   return res;
00095 }
00096 
00097 void help()
00098 { 
00099   puts("\n Keyword spotting                                               ");
00100   printf(" %s\n", BSAPIVersion());
00101   puts(" ================================================================ ");
00102   puts("                                                                  ");
00103   puts(" USAGE: kws_cmd [options]                                         ");
00104   puts("                                                                  ");
00105   puts(" system configuration:                                            ");
00106   puts("   -c file         configuration file                             ");
00107   puts("                                                                  ");
00108   puts(" processing one file:                                             ");
00109   puts("   -i file         input file                                     ");
00110   puts("   -o file         detection file                                 ");
00111   puts("   -h file         hypothesis file                                ");
00112   puts("                                                                  ");
00113   puts(" processing list of files:                                        ");
00114   puts("   -l file         list of input files                            ");
00115   puts("   -p file         detection master file (for all input files)    ");
00116   puts("   -q file         hypothesis master file (for all input files)   ");
00117   puts("   -o \"none\"       will not produce detection file per each file ");
00118   puts("   -h \"none\"       will not produce hypothesis file per each file");
00119   puts("                                                                  ");
00120   puts(" processing directory:                                            ");
00121   puts("   -d dir          input directory                                ");
00122   puts("   -e str [raw]    extension of audio files                       ");
00123   puts("   -p file         detection master file (for all input files)    ");
00124   puts("   -q file         hypothesis master file (for all input files)   ");
00125   puts("   -o \"none\"       will not produce detection file per each file ");
00126   puts("   -h \"none\"       will not produce hypothesis file per each file");
00127   puts("                                                                  ");
00128   puts(" processing features instead of waveform:                         ");
00129   puts("   -f              enable processing of features                  ");
00130   puts("   -a str [idx]    extension of index file                        ");
00131   puts("   -b str [fea]    extension of feature files                     ");
00132   puts("                                                                  ");
00133   puts(" NOTE: in case of -l or -d the output will be written             ");
00134   puts("       to *.hyp and *.det where * is the audio filename           ");
00135   puts("       without extension. Can be switched off by -o none, -h none ");
00136   puts("                                                                  ");
00137   puts(" other options:                                                   ");
00138   puts("   -v              verbose mode                                   ");
00139   puts("   -s fmt [mlf]    time format (seconds, mlf)                     ");
00140   puts("   -y file         list of keywords and pronunciations in XML     ");
00141   puts("   -t file         XSLT to be linked with the list of keywords    ");
00142   puts("   -w fmt [lin16]  waveform format (lin16, lin8, alaw, mulaw)     ");
00143   puts("   -n num [1]      number of channels in audio files, takes into  ");
00144   puts("                   account for non header files (.raw)            ");
00145   puts("   -x file         list of keywords in XML format                 ");
00146   puts("   -k file         list of keywords in plain text format          ");
00147   puts("   -g network_file load the kws net from this file instead of using netgen");
00148   puts("   -j              keep output label unchanged (with pronunciation variant)");
00149   puts("                                                                  ");
00150 }
00151 
00152 int main(int argc, char **argv)
00153 {
00154   // initial configuration variables
00155   char *pconfig_file   = 0;
00156   char *pinput_file    = 0;
00157   char *plist_file     = 0;
00158   char *pinput_dir     = 0;
00159   const char *pwave_ext = "raw";
00160   const char *pwave_fmt = "lin16";
00161   char *pkwlxml_file   = 0;
00162   char *pkwltxt_file   = 0;
00163   char *pkwlxsl_file   = 0;
00164   char *poutdet_file   = 0;
00165   char *pouthyp_file   = 0;
00166   char *poutdetm_file  = 0;
00167   char *pouthypm_file  = 0;
00168   const char *ptime_fmt = "mlf";
00169   char *poutkwl_file   = 0;
00170   char *pnetwork_file  = 0;
00171   int  nchannels       = 1;
00172   bool pronvariant_in_label = false;
00173   bool process_fea     = false;
00174   const char *pidx_ext = "idx";
00175   const char *pfea_ext = "fea";
00176 
00177   // command line parsing
00178   if(argc == 1)
00179   {
00180     help();
00181     return 0;
00182   }
00183 
00184   optind = 0;
00185   while (1)
00186   {
00187     int c = getopt(argc, argv, const_cast<char *>("-c:i:l:d:e:w:n:x:k:o:h:p:q:s:y:t:vg:jfa:b:"));
00188     if(c == -1)
00189       break;
00190 
00191     switch(c)
00192     {
00193       case 'c':
00194         pconfig_file = optarg;
00195         break;
00196       case 'i':
00197         pinput_file = optarg;
00198         break;
00199       case 'l':
00200         plist_file = optarg;
00201         break;
00202       case 'd':
00203         pinput_dir = optarg;
00204         break;
00205       case 'e':
00206         pwave_ext = optarg;
00207         break;
00208       case 'w':
00209         pwave_fmt = optarg;
00210         break;
00211      case 'n':
00212        if(sscanf(optarg, "%d", &nchannels) != 1 || nchannels < 1)
00213        {
00214          fprintf(stderr, "ERROR: Invalid number of channels: %s.\n", optarg);
00215          return 1;
00216        }
00217        break;
00218       case 'x':
00219         pkwlxml_file = optarg;
00220         break;
00221       case 'k':
00222         pkwltxt_file = optarg;
00223         break;
00224       case 'o':
00225         poutdet_file = optarg;
00226         break;
00227       case 'h':
00228         pouthyp_file = optarg;
00229         break;
00230       case 'p':
00231         poutdetm_file = optarg;
00232         break;
00233       case 'q':
00234         pouthypm_file = optarg;
00235         break;
00236       case 's':
00237         ptime_fmt = optarg;
00238         break;
00239       case 'y':
00240         poutkwl_file = optarg;
00241         break;
00242       case 't':
00243         pkwlxsl_file = optarg;
00244         break;
00245       case 'f':
00246         process_fea = true;
00247         break;
00248       case 'a':
00249         pidx_ext = optarg;
00250         break;
00251       case 'b':
00252         pfea_ext = optarg;
00253         break;
00254       case 'v':
00255         gErrorHandler.SetVerbose(true);
00256         break;
00257       case 'g':
00258         pnetwork_file = optarg;
00259         break;
00260       case 'j':
00261         pronvariant_in_label = true;
00262         break;
00263       case '?':
00264         fprintf(stderr, "ERROR: Command line parsing error.\n");
00265         return 1;
00266       default :
00267         fprintf(stderr, "ERROR: Command line parsing error. Unexpected argument '%s'.\n", optarg);
00268         return 1;
00269     }
00270   }
00271 
00272   if (!((pkwlxml_file == 0) ^ (pkwltxt_file == 0)))
00273   {
00274     fprintf(stderr, "ERROR: List of keywords must be given either in XML (-x) or plain text format (-k).\n");
00275     return 1;
00276   }
00277 
00278   // register license file for SKeywordSpottingI
00279   SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00280   if (plicman)
00281   {
00282     plicman->SetErrorHandler(&gErrorHandler);
00283     plicman->RegisterLicenseFile("license.dat");
00284   }
00285 
00286   gHypTarget.SetTimeFormatStr(ptime_fmt);
00287   gDetTarget.SetTimeFormatStr(ptime_fmt);
00288 
00289   SKeywordSpottingI *pkws = static_cast<SKeywordSpottingI *>(BSAPICreateInstance(SIID_KWS));
00290   if(!pkws)
00291   {
00292     return 1;
00293   }
00294   
00295   pkws->SetErrorHandler(&gErrorHandler);
00296 
00297   if (plicman)
00298   {
00299     long_long secs_to_expire = plicman->GetSecsToExpire(SIID_KWS);
00300     if (secs_to_expire < KWS_LIC_SECS_TO_EXPIRE_WARN)
00301       fprintf(stderr, "WARNING: License will expire in %d day(s).\n", static_cast<int>(secs_to_expire / (24 * 3600)));
00302   }
00303 
00304   // read configuration file and configure the instance
00305   char pdefault_cfg[1024];
00306   sprintf(pdefault_cfg, "settings%smain_config", DIRSEP);  // Visual C does not have snprintf
00307   if(!pkws->Init((pconfig_file ? pconfig_file : pdefault_cfg)))
00308   {
00309     pkws->Release();
00310     return 1;
00311   }
00312 
00313   SBlockSetI *pbset = pkws->GetBlockSet();
00314   if (!pbset)
00315   {
00316     pkws->Release();
00317     return 1;
00318   }
00319 
00320   if (pnetwork_file)
00321   {
00322     SDecoderI *pdecoder = static_cast<SDecoderI *>(pbset->GetBlock("decoder"));
00323     if (!pdecoder)
00324     {
00325       pkws->Release();
00326       return 1;
00327     }
00328 
00329     if (!pdecoder->LoadNetwork(pnetwork_file))
00330     {
00331       pkws->Release();
00332       return 1;
00333     }
00334   }
00335 
00336   if (pronvariant_in_label) {
00337     gHypTarget.SetDoNotChangeLabel(true);
00338     gDetTarget.SetDoNotChangeLabel(true);
00339   }
00340 
00341   // It is also possible to configure the waveform source.
00342   SWaveformFormatConvertorI *pwc = static_cast<SWaveformFormatConvertorI *>(pbset->GetBlock("waveform_convertor"));
00343   if (pwc)
00344   {
00345     pwc->SetNChannels(nchannels);
00346     pwc->SetInputFormatStr(pwave_fmt);
00347   }
00348   
00349   if (pbset->Exists("data_node"))
00350   {
00351     SDataStreamNodeI *pdata_node = static_cast<SDataStreamNodeI *>(pbset->GetBlock("data_node"));
00352     if (pdata_node)
00353     {
00354       pdata_node->SetIndexFileSuffix(pidx_ext);
00355       pdata_node->SetFeatureFileSuffix(pfea_ext);
00356     }
00357   }
00358 
00359   // Prepare keyword list
00360   SKeywordListI * mpKeywordList;
00361   mpKeywordList=pkws->GetKeywordList();
00362   if(!mpKeywordList)
00363   {
00364     pkws->Release();
00365     return 1;
00366   }
00367   
00368   if(!mpKeywordList->Load(
00369     pkwlxml_file ? pkwlxml_file : pkwltxt_file,
00370     pkwlxml_file ? SKeywordListI::ffXML : SKeywordListI::ffPlain))
00371   {
00372     pkws->Release();
00373     return 1;
00374   }
00375   
00376   if(!mpKeywordList->AddPronunciations(SKeywordListI::apmCondAdd))
00377   {
00378     pkws->Release();
00379     return 1;
00380   }
00381   
00382   if(!mpKeywordList->CheckKeywords())
00383   {
00384     pkws->Release();
00385     return 1;
00386   }
00387   
00388   if(poutkwl_file && !mpKeywordList->Save(poutkwl_file, pkwlxsl_file))
00389   {
00390     pkws->Release();
00391     return 1;
00392   }
00393 
00394   // Set targets
00395   pkws->SetDetectionsTarget(&gDetTarget);
00396   pkws->SetHypothesesTarget(&gHypTarget);
00397 
00398   gHypTarget.StartProcessing(pouthypm_file);
00399   gDetTarget.StartProcessing(poutdetm_file);
00400 
00401   // the input is one file
00402   if(pinput_file)
00403   {
00404     gErrorHandler.LogMessage("Processing file: %s", pinput_file);
00405 
00406     gHypTarget.StartProcessingFile(pinput_file, pouthyp_file);
00407     gDetTarget.StartProcessingFile(pinput_file, poutdet_file);
00408     if(!pkws->ProcessFile(pinput_file, 0, 0, (!process_fea ? SKeywordSpottingI::kiWaveform : SKeywordSpottingI::kiPosteriors)))
00409     {
00410       pkws->Release();
00411       return 1;
00412     }
00413     gHypTarget.FinishProcessingFile();
00414     gDetTarget.FinishProcessingFile();
00415   }
00416 
00417   // the input is listfile
00418   if(plist_file)
00419   {
00420     SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00421     if(!plist)
00422     {
00423       fprintf(stderr, "Memory allocation error.");
00424       pkws->Release();
00425       return 1;
00426     }
00427     plist->SetErrorHandler(&gErrorHandler);
00428     if(!plist->AddList(plist_file))
00429     {
00430       plist->Release();
00431       pkws->Release();
00432       return 1;
00433     }
00434 
00435     plist->FirstLine();
00436     char ptarget[1024];
00437     char psource[1024];
00438     int start;
00439     int end;
00440     float prob;
00441     while(plist->GetLine(ptarget, psource, &start, &end, &prob))
00442     {
00443       gErrorHandler.LogMessage("Processing file: %s", psource);
00444 
00445       std::string outhyp_file = change_file_suffix(psource, ".hyp");
00446       std::string outdet_file = change_file_suffix(psource, ".det");
00447       
00448       if( !pouthyp_file || (pouthyp_file &&  strcmp(pouthyp_file,"none") != 0 &&  strcmp(pouthyp_file,"") != 0 )){
00449         gHypTarget.StartProcessingFile(psource, outhyp_file.c_str());
00450       }else{
00451         gHypTarget.StartProcessingFile(psource, 0);
00452       }
00453 
00454       if( !poutdet_file || (poutdet_file && strcmp(poutdet_file,"none") != 0 &&  strcmp(poutdet_file,"") != 0 )){
00455         gDetTarget.StartProcessingFile(psource, outdet_file.c_str());
00456       }else{
00457         gDetTarget.StartProcessingFile(psource, 0);
00458       }
00459 
00460       if(!pkws->ProcessFile(psource, 0, 0, (!process_fea ? SKeywordSpottingI::kiWaveform : SKeywordSpottingI::kiPosteriors)))
00461       {
00462         plist->Release();
00463         pkws->Release();
00464         return 1;
00465       }
00466       gHypTarget.FinishProcessingFile();
00467       gDetTarget.FinishProcessingFile();
00468     }
00469     plist->Release();
00470   }
00471 
00472   // the input is a directory
00473   // The FileSniffer can monitor a directory for incoming files. Here, the dictionary is searched just once.
00474   if(pinput_dir)
00475   {
00476     SFileSnifferI *psniffer = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00477     if(!psniffer)
00478     {
00479       fprintf(stderr, "Memory allocation error.");
00480       pkws->Release();
00481       return 1;
00482     }
00483     psniffer->SetErrorHandler(&gErrorHandler);
00484     psniffer->AddDirectory(pinput_dir);
00485     psniffer->AddWantedSuffix(const_cast<char *>(pwave_ext));
00486 
00487     if(!psniffer->FirstFile())
00488     {
00489       psniffer->Release();
00490       pkws->Release();
00491       return 1;
00492     }
00493 
00494     char psource[1024];
00495     while(psniffer->GetFile(psource, sizeof(psource) - 1))
00496     {
00497       gErrorHandler.LogMessage("Processing file: %s", psource);
00498 
00499       std::string outhyp_file = change_file_suffix(psource, ".hyp");
00500       std::string outdet_file = change_file_suffix(psource, ".det");
00501 
00502       if( !pouthyp_file || (pouthyp_file &&  strcmp(pouthyp_file,"none") != 0 &&  strcmp(pouthyp_file,"") != 0 )){
00503         gHypTarget.StartProcessingFile(psource, outhyp_file.c_str());
00504       }else{
00505         gHypTarget.StartProcessingFile(psource, 0);
00506       }
00507 
00508       if( !poutdet_file || (poutdet_file && strcmp(poutdet_file,"none") != 0 &&  strcmp(poutdet_file,"") != 0 )){
00509         gDetTarget.StartProcessingFile(psource, outdet_file.c_str());
00510       }else{
00511         gDetTarget.StartProcessingFile(psource, 0);
00512       }
00513 
00514       if(!pkws->ProcessFile(psource, 0, 0, (!process_fea ? SKeywordSpottingI::kiWaveform : SKeywordSpottingI::kiPosteriors)))
00515       {
00516         psniffer->Release();
00517         pkws->Release();
00518         return 1;
00519       }
00520       gHypTarget.FinishProcessingFile();
00521       gDetTarget.FinishProcessingFile();
00522     }
00523     psniffer->Release();
00524   }
00525 
00526   gHypTarget.FinishProcessing();
00527   gDetTarget.FinishProcessing();
00528 
00529   pkws->Release();
00530   return 0;
00531 }

Generated on Wed Apr 11 10:00:17 2012 for BSAPI by  doxygen 1.4.7