gender_identification.cpp

This example shows how to use the SGenderIDI interface.

00001 /*********************************************************************
00002  *  Phonexia Gender Identification                                   *
00003  *                                                                   *
00004  *  Copyright   : (C) 2006-2008 by Petr Schwarz & Pavel Matejka      *
00005  *                         & Tomas Cipr                              *
00006  *                                                                   *
00007  *  Email       : {schwarz,matejka,cipr}@phonexia.com                *
00008  *********************************************************************/
00009 
00010 #include <stdio.h>
00011 #include <errno.h>
00012 #include <stdarg.h>
00013 #include <stdlib.h>
00014 #include <string.h>
00015 #include <assert.h>
00016 #include <math.h>
00017 #include <float.h>
00018 #include <limits.h>
00019 #include "bsapi.h"
00020 
00021 #ifdef WIN32
00022   #include "getopt.h"
00023   #define DIRSEP "\\"
00024 #else
00025   #include <unistd.h>
00026   #define DIRSEP "/"
00027 #endif
00028 
00029 #define GID_LOG_0                 -1.0e10
00030 #define GID_LOG_MIN                (0.5 * GID_LOG_0)
00031 #define GID_MIN_LOG_DIFF           log(DBL_EPSILON)
00032 #define GID_LOWER_SCORE_THRESHOLD -1e5
00033 #define GID_DEF_SCORE_SHARPNESS    4.0f
00034 #define GID_DEF_GENDER_BALANCE     0.0f
00035 #define GID_DEF_UNK_CLASS_THR      80.0f
00036 #define GID_DEF_NCHANNELS          1
00037 #define GID_UNKNOWN_CLASS_LETTER   'U'
00038 #define GID_UNKNOWN_CLASS_NAME     "UNKNOWN"
00039 
00040 enum InputModeType
00041 {
00042   GID_IMT_NONE,
00043   GID_IMT_FILE,
00044   GID_IMT_LIST,
00045   GID_IMT_DIR,
00046 };
00047 
00048 enum GenderType
00049 {
00050   GID_GNDR_FEMALE = 1,
00051   GID_GNDR_MALE,
00052   GID_GNDR_NGENDERS
00053 };
00054 
00055 void help(const char *pProgName)
00056 { 
00057   puts("\n Gender Identification                                          ");
00058 printf(" %s\n", BSAPIVersion());
00059   puts(" ================================================================ ");
00060   puts("                                                                  ");
00061 printf(" USAGE: %s [options]\n", pProgName);
00062   puts("                                                                  ");
00063   puts(" options:                                                         ");
00064   puts("   -c file           configuration file                           ");
00065   puts("   -i file           input file                                   ");
00066   puts("   -l file           list of input files                          ");
00067   puts("   -d dir            input directory                              ");
00068   puts("   -e str  [alw,wav] extensions of input files (comma separated)  ");     
00069   puts("   -w fmt  [alaw]    waveform format (lin16, lin8, alaw, mulaw)   ");
00070   puts("   -n num  [1]       number of channels in audio files            ");
00071   puts("   -o file [stdout]  output score file                            ");
00072   puts("   -r                produce scores for both genders              ");
00073   puts("   -s num  [4.0]     score sharpness (positive number)            ");
00074   puts("   -b num  [0.0]     gender balance (-1.0, 1.0)                   ");
00075   puts("                        -1.0 ... 100% males                       ");
00076   puts("                         0.0 ... equally balanced                 ");
00077   puts("                         1.0 ... 100% females                     ");
00078 printf("   -u num  [80.0]    unknown class (%c) score thr. (50.0, 100.0)\n", GID_UNKNOWN_CLASS_LETTER);
00079   puts("                        50.0 ... unknown class off                ");
00080   puts("                       >50.0 ... unknown class if less than thr.  ");
00081   puts("   -v                verbose mode                                 ");
00082   puts("                                                                  ");
00083 }
00084 
00085 // This is an error handling object. If an error occur, a message is sent to 
00086 // this object at first. Then the running function will exit with the false 
00087 // or 0 return value. The object also accept warning and logging messages.
00088 class ErrorHandler : public SErrorCallbackI 
00089 {
00090   public:
00091     ErrorHandler() : mVerbose(false) {;}
00092     
00093     virtual void BSAPI_METHOD OnTextMessage(
00094       unsigned int iId, message_type type, unsigned int messageId, char *pMessage)
00095     {
00096       switch(type)
00097       {
00098         case mtError:
00099           fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iId), pMessage);
00100           break;
00101         case mtWarning:
00102           fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iId), pMessage);
00103           break;
00104         case mtLog:
00105           LogMessage(pMessage);
00106           break;
00107       }
00108     }
00109     
00110     void LogMessage(const char *pMessage, ...)
00111     {
00112       if (mVerbose)
00113       {
00114         va_list ap;
00115         va_start(ap, pMessage);
00116         vfprintf(stderr, pMessage, ap);
00117         fprintf(stderr, "\n");
00118         va_end(ap);
00119       }
00120     }
00121     
00122     void SetVerbose(bool verbose = true) {mVerbose = verbose;}
00123   
00124   protected:
00125     bool mVerbose;
00126 } gErrorHandler;
00127 
00128 bool str2float(const char *pStr, float *pValue)
00129 {
00130   char *pend_ptr = 0;
00131   
00132   errno = 0;
00133   *pValue = strtof(pStr, &pend_ptr);
00134     
00135   if (errno != 0 || *pend_ptr != '\0')
00136   {
00137     fprintf(stderr, 
00138       "ERROR: Can not convert string '%s' to floating point number.\n", pStr);
00139     return false;
00140   }
00141 
00142   return true;
00143 }
00144 
00145 void print_output_header(FILE *pOutput, SGenderIDI *pGenderId)
00146 {
00147   int length = 0;
00148   char **ppnames = pGenderId->GetModelNames(&length);
00149   for (int i = 1; i < length; i++)
00150     fprintf(pOutput, "%s ", ppnames[i]);
00151   fprintf(pOutput, "\n");
00152 }
00153 
00154 double log_add(double x, double y)
00155 {
00156   double diff;
00157   
00158   if (x < y)
00159   {
00160     diff = x - y;
00161     x = y;
00162   }
00163   else
00164   {
00165     diff = y - x;
00166   }
00167   
00168   if (x < GID_LOG_MIN)
00169     return GID_LOG_0;
00170   
00171   if (diff < GID_MIN_LOG_DIFF)
00172     return  x;
00173   
00174   return x + log(1.0 + exp(diff));
00175 } 
00176 
00177 void log_like2post_prob(
00178     float& scoreFemale, 
00179     float& scoreMale, 
00180     float sharpness = 1.0f, 
00181     float balance = 0.0f)
00182 {
00183   balance = (balance + 1.0f) / 2.0f;
00184   scoreMale   = sharpness * scoreMale   + logf(1.0f - balance);
00185   scoreFemale = sharpness * scoreFemale + logf(balance);
00186   
00187   if (scoreMale < GID_LOWER_SCORE_THRESHOLD)
00188     scoreMale = GID_LOWER_SCORE_THRESHOLD;
00189     
00190   if (scoreFemale < GID_LOWER_SCORE_THRESHOLD)
00191     scoreFemale = GID_LOWER_SCORE_THRESHOLD;
00192   
00193   float sum = log_add(scoreMale, scoreFemale);
00194   
00195   scoreMale   -= sum;
00196   scoreFemale -= sum;
00197 
00198   // renormalization - in the first pass, the registers could have been in 
00199   // an arrea of saturation with very high rounding
00200   sum = log_add(scoreMale, scoreFemale);
00201 
00202   scoreMale   = expf(scoreMale   - sum) * 100.0f;
00203   scoreFemale = expf(scoreFemale - sum) * 100.0f;
00204 }
00205 
00206 bool process_file(char *pInputFile,
00207                   FILE *pOutput,
00208                   SGenderIDI *pGenderId,
00209                   float sharpness,
00210                   float balance,
00211                   float unkThreshold,
00212                   bool  dumpAllScores)
00213 {
00214   gErrorHandler.LogMessage("Processing file: %s", pInputFile);
00215   if (!pGenderId->TestFile(pInputFile))
00216     return false;
00217   
00218   fprintf(pOutput, "%s", pInputFile);
00219 
00220   int length = 0;
00221   char **ppnames = pGenderId->GetModelNames(&length);
00222   float *pscores = pGenderId->GetModelScores(&length);
00223   assert(pscores && ppnames && (GID_GNDR_NGENDERS == length));
00224   
00225   log_like2post_prob(
00226     pscores[GID_GNDR_FEMALE], 
00227     pscores[GID_GNDR_MALE], 
00228     sharpness, balance);
00229   
00230   int winner = (pscores[GID_GNDR_FEMALE] > pscores[GID_GNDR_MALE]) ? 
00231     GID_GNDR_FEMALE : GID_GNDR_MALE;
00232   
00233   if (dumpAllScores)
00234   {
00235     fprintf(pOutput, " %f %f %c\n", 
00236       pscores[GID_GNDR_FEMALE], 
00237       pscores[GID_GNDR_MALE],
00238       (pscores[winner] < unkThreshold) ? GID_UNKNOWN_CLASS_LETTER : ppnames[winner][0]);
00239   }
00240   else
00241   {
00242     if (pscores[winner] < unkThreshold)
00243       fprintf(pOutput, " %s %f %c\n", GID_UNKNOWN_CLASS_NAME, pscores[winner], ppnames[winner][0]);
00244     else
00245       fprintf(pOutput, " %s %f\n", ppnames[winner], pscores[winner]);
00246   }
00247 
00248   return true;
00249 }
00250 
00251 bool process_list(char *pInputList,
00252                   FILE *pOutput,
00253                   SGenderIDI *pGenderId,
00254                   float sharpness,
00255                   float balance,
00256                   float unkThreshold,
00257                   bool  dumpAllScores)
00258 {
00259   gErrorHandler.LogMessage("Processing list of files: %s", pInputList);
00260   SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00261   if (!plist)
00262   {
00263     fprintf(stderr, "ERROR: Memory allocation error.\n");
00264     return false;
00265   }
00266 
00267   plist->SetErrorHandler(&gErrorHandler);
00268   if (!plist->AddList(pInputList))
00269   {
00270     plist->Release();
00271     return false;
00272   }
00273   
00274   char ptarget[1024], psource[1024];
00275   int start, end;
00276   float prob;
00277   
00278   plist->FirstLine();      
00279   while (plist->GetLine(ptarget, psource, &start, &end, &prob))
00280   {
00281     process_file(psource, pOutput, pGenderId, sharpness, 
00282       balance, unkThreshold, dumpAllScores);
00283   }
00284   
00285   plist->Release();
00286   return true;
00287 }
00288 
00289 bool process_dir(char *pInputDir,
00290                  const char *pSuffixes,
00291                  FILE *pOutput,
00292                  SGenderIDI *pGenderId,
00293                  float sharpness,
00294                  float balance,
00295                  float unkThreshold,
00296                  bool  dumpAllScores)
00297 {
00298   gErrorHandler.LogMessage("Processing directory: %s", pInputDir);
00299   SFileSnifferI *pdir = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00300   if (!pdir)
00301   {
00302     fprintf(stderr, "ERROR: Memory allocation error.\n");
00303     return false;
00304   }
00305   
00306   pdir->SetErrorHandler(&gErrorHandler);
00307   if (!pdir->AddDirectory(pInputDir))
00308   {
00309     pdir->Release();
00310     return false;
00311   }
00312   
00313   char psuffixes[1024];
00314   psuffixes[sizeof(psuffixes) - 1] = '\0';
00315   strncpy(psuffixes, pSuffixes, sizeof(psuffixes) - 1);
00316   char *psuffix = strtok(psuffixes, ",; ");
00317   while (psuffix)
00318   {
00319     pdir->AddWantedSuffix(psuffix);
00320     psuffix = strtok(0, ",; ");
00321   }
00322 
00323   if (!pdir->FirstFile())
00324   {
00325     pdir->Release();
00326     return false;
00327   }
00328       
00329   char psource[1024];
00330   while(pdir->GetFile(psource, sizeof(psource) - 1))
00331   {
00332     process_file(psource, pOutput, pGenderId, sharpness, 
00333       balance, unkThreshold, dumpAllScores);
00334   }
00335   
00336   pdir->Release(); 
00337 }
00338 
00339 int main(int argc, char *argv[])
00340 {
00341   // init configuration variables
00342   InputModeType mode  = GID_IMT_NONE;
00343   char *pconfig_file  = 0;
00344   char *pinput_format = "alaw";
00345   char *pinput_ext    = "alw,wav";
00346   char *pinput        = 0;
00347   char *poutput_file  = 0;
00348   float sharpness     = GID_DEF_SCORE_SHARPNESS;
00349   float balance       = GID_DEF_GENDER_BALANCE;
00350   float unk_thr       = GID_DEF_UNK_CLASS_THR;
00351   int   nchannels     = GID_DEF_NCHANNELS;
00352   bool  dump_all_scores = false;
00353 
00354   // no input parameters given
00355   if(argc == 1)
00356   {
00357     help(argv[0]);
00358     return 0;
00359   }
00360 
00361   // command line parsing
00362   int c;
00363   optind = 0;
00364   while ((c = getopt(argc, argv, const_cast<char *>("c:i:l:d:e:w:n:o:rs:b:u:v"))) != -1)
00365   {
00366     switch(c)
00367     {
00368       case 'c': pconfig_file  = optarg; break;
00369       case 'i': pinput = optarg; mode = GID_IMT_FILE; break;
00370       case 'l': pinput = optarg; mode = GID_IMT_LIST; break;
00371       case 'd': pinput = optarg; mode = GID_IMT_DIR;  break;
00372       case 'e': pinput_ext    = optarg; break;
00373       case 'w': pinput_format = optarg; break;
00374       case 'o': poutput_file  = optarg; break;
00375       case 'r': dump_all_scores = true; break;
00376       case 's': if (!str2float(optarg, &sharpness)) return 1; break;
00377       case 'b': if (!str2float(optarg, &balance))   return 1; break;
00378       case 'u': if (!str2float(optarg, &unk_thr))   return 1; break;
00379       case 'n': nchannels = atoi(optarg);   break;
00380       case 'v': gErrorHandler.SetVerbose(); break;
00381       case '?':
00382         fprintf(stderr, "ERROR: Command line parsing error.\n");
00383         return 1;
00384     }
00385   }
00386   
00387   // check if the config file was set
00388   if (!pconfig_file)
00389   {
00390     fprintf(stderr, "ERROR: Configuration file was not set (-c).\n");
00391     return 1;
00392   }
00393 
00394   // check if either input file or list file was set
00395   if (GID_IMT_NONE == mode)
00396   {
00397     fprintf(stderr, "ERROR: Input file, list or directory must be set (-i | -l | -d).\n");
00398     return 1;
00399   }
00400 
00401   // check the number of channels
00402   if (nchannels < 1)
00403   {
00404     fprintf(stderr, "ERROR: Invalid number of channels (-n).\n");
00405     return 1;
00406   }
00407 
00408   // check if sharpness is in allowed interval
00409   if (sharpness < 0.0f)
00410   {
00411     fprintf(stderr, "ERROR: Wrong value of sharpness '%f'. "
00412       "Must be positive number. (-s)\n", sharpness);
00413     return 1;
00414   }
00415 
00416   // check if balance is in allowed interval
00417   if (balance < -1.0f || balance > 1.0f)
00418   {
00419     fprintf(stderr, "ERROR: Wrong value of balance '%f'. "
00420       "Must be in (-1.0, 1.0). (-b)\n", balance);
00421     return 1;
00422   }
00423 
00424   // check if unknown class threshold is in allowed interval
00425   if (unk_thr < 50.0f || unk_thr > 100.0f)
00426   {
00427     fprintf(stderr, "ERROR: Wrong value of unknown class score threshold '%f'. "
00428       "Must be in (50.0, 100.0). (-u)\n", unk_thr);
00429     return 1;
00430   }
00431 
00432   // register license file for SGenderIDI
00433   SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00434   if (plicman)
00435   {
00436     plicman->SetErrorHandler(&gErrorHandler);
00437     plicman->RegisterLicenseFile("license.dat");
00438   }
00439   
00440   // init gender ID
00441   SGenderIDI *pgender_id = static_cast<SGenderIDI *>(BSAPICreateInstance(SIID_GENDERID));
00442   if (!pgender_id)
00443   {
00444     return 1;
00445   }
00446 
00447   pgender_id->SetErrorHandler(&gErrorHandler);
00448 
00449   if (!pgender_id->Init(pconfig_file))
00450   {
00451     pgender_id->Release();
00452     return 1;
00453   }
00454 
00455   // It is also possible to configure the waveform source.
00456   // If there are multiple channels, the channels are concatenated and just one decision made.
00457   SWaveformFormatConvertorI *pwc = pgender_id->GetWaveformFormatConvertor();
00458   if (pwc)
00459   {
00460     pwc->SetNChannels(nchannels);
00461     pwc->SetInputFormatStr(pinput_format);
00462   }
00463 
00464   // activate gender models before scoring
00465   if (!pgender_id->ActivateAllModels())
00466   {
00467     pgender_id->Release();
00468     return 1;
00469   }
00470 
00471   // check that both models are activated
00472   int nmodels = 0;
00473   pgender_id->GetModelNames(&nmodels);
00474   if (GID_GNDR_NGENDERS != nmodels)
00475   {
00476     fprintf(stderr, "ERROR: Some of gender models are missing.\n");
00477     pgender_id->Release();
00478     return 1;
00479   }
00480 
00481   // open output file
00482   FILE *poutput = poutput_file ? fopen(poutput_file, "w") : stdout;
00483   if (!poutput)
00484   {
00485     fprintf(stderr, "ERROR: Could not open output file '%s'.\n", poutput_file);
00486     pgender_id->Release();
00487     return 1;
00488   }
00489 
00490   // print output score file header
00491   if (dump_all_scores)
00492     print_output_header(poutput, pgender_id);
00493 
00494   // process file(s)
00495   bool processed = false;
00496   switch (mode)
00497   {
00498     case GID_IMT_FILE:
00499       processed = process_file(
00500         pinput, poutput, pgender_id, sharpness, balance, unk_thr, dump_all_scores);
00501       break;
00502       
00503     case GID_IMT_LIST:
00504       processed = process_list(
00505         pinput, poutput, pgender_id, sharpness, balance, unk_thr, dump_all_scores);
00506       break;
00507       
00508     case GID_IMT_DIR:
00509       processed = process_dir(
00510         pinput, pinput_ext, poutput, pgender_id, sharpness, balance, unk_thr, dump_all_scores);
00511       break;
00512       
00513     case GID_IMT_NONE:
00514       break;
00515   }
00516 
00517   // cleanup
00518   if (poutput_file)
00519     fclose(poutput);
00520 
00521   pgender_id->Release();
00522   return (processed ? 0 : 1);
00523 }

Generated on Wed Jul 15 10:10:04 2009 for BSAPI by  doxygen 1.4.7