00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <stdio.h>
00011 #include <errno.h>
00012 #include <stdarg.h>
00013 #include <stdlib.h>
00014 #include <string.h>
00015 #include <assert.h>
00016 #include <math.h>
00017 #include <float.h>
00018 #include <limits.h>
00019 #include "bsapi.h"
00020
00021 #ifdef _WIN32
00022 #include "getopt.h"
00023 #define DIRSEP "\\"
00024 #else
00025 #include <unistd.h>
00026 #define DIRSEP "/"
00027 #endif
00028
00029 #define GID_LOG_0 -1.0e10
00030 #define GID_LOG_MIN (0.5 * GID_LOG_0)
00031 #define GID_MIN_LOG_DIFF log(DBL_EPSILON)
00032 #define GID_LOWER_SCORE_THRESHOLD -1e5
00033 #define GID_DEF_SCORE_SHARPNESS 4.0f
00034 #define GID_DEF_GENDER_BALANCE 0.0f
00035 #define GID_DEF_UNK_CLASS_THR 80.0f
00036 #define GID_DEF_NCHANNELS 1
00037 #define GID_UNKNOWN_CLASS_LETTER 'U'
00038 #define GID_UNKNOWN_CLASS_NAME "UNKNOWN"
00039
00040 enum InputModeType
00041 {
00042 GID_IMT_NONE,
00043 GID_IMT_FILE,
00044 GID_IMT_LIST,
00045 GID_IMT_DIR,
00046 };
00047
00048 enum GenderType
00049 {
00050 GID_GNDR_FEMALE = 0,
00051 GID_GNDR_MALE,
00052 GID_GNDR_NGENDERS
00053 };
00054
00055 void help(const char *pProgName)
00056 {
00057 puts("\n Gender Identification ");
00058 printf(" %s\n", BSAPIVersion());
00059 puts(" ================================================================ ");
00060 puts(" ");
00061 printf(" USAGE: %s [options]\n", pProgName);
00062 puts(" ");
00063 puts(" options: ");
00064 puts(" -c file configuration file ");
00065 puts(" -i file input file ");
00066 puts(" -l file list of input files ");
00067 puts(" -d dir input directory ");
00068 puts(" -e str [alw,wav] extensions of input files (comma separated) ");
00069 puts(" -w fmt [alaw] waveform format (lin16, lin8, alaw, mulaw) ");
00070 puts(" -n num [1] number of channels in audio files ");
00071 puts(" -o file [stdout] output score file ");
00072 puts(" -r produce scores for both genders ");
00073 puts(" -s num [4.0] score sharpness (positive number) ");
00074 puts(" -b num [0.0] gender balance (-1.0, 1.0) ");
00075 puts(" -1.0 ... 100% males ");
00076 puts(" 0.0 ... equally balanced ");
00077 puts(" 1.0 ... 100% females ");
00078 printf(" -u num [80.0] unknown class (%c) score thr. (50.0, 100.0)\n", GID_UNKNOWN_CLASS_LETTER);
00079 puts(" 50.0 ... unknown class off ");
00080 puts(" >50.0 ... unknown class if less than thr. ");
00081 puts(" -v verbose mode ");
00082 puts(" ");
00083 }
00084
00085
00086
00087
00088 class ErrorHandler : public SErrorCallbackI
00089 {
00090 public:
00091 ErrorHandler() : mVerbose(false) {;}
00092
00093 virtual void BSAPI_METHOD OnTextMessage(
00094 SUnknownI *pSender, message_type type, unsigned int messageId, const char *pMessage)
00095 {
00096 unsigned int iid = pSender ? pSender->GetIID() : SIID_UNDEFINED;
00097 switch(type)
00098 {
00099 case mtError:
00100 fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00101 break;
00102 case mtWarning:
00103 fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00104 break;
00105 case mtLog:
00106 LogMessage(pMessage);
00107 break;
00108 }
00109 }
00110
00111 void LogMessage(const char *pMessage, ...)
00112 {
00113 if (mVerbose)
00114 {
00115 va_list ap;
00116 va_start(ap, pMessage);
00117 vfprintf(stderr, pMessage, ap);
00118 fprintf(stderr, "\n");
00119 va_end(ap);
00120 }
00121 }
00122
00123 void SetVerbose(bool verbose = true) {mVerbose = verbose;}
00124
00125 protected:
00126 bool mVerbose;
00127 } gErrorHandler;
00128
00129 bool str2float(const char *pStr, float *pValue)
00130 {
00131 char *pend_ptr = 0;
00132
00133 errno = 0;
00134 *pValue = strtod(pStr, &pend_ptr);
00135
00136 if (errno != 0 || *pend_ptr != '\0')
00137 {
00138 fprintf(stderr,
00139 "ERROR: Can not convert string '%s' to floating point number.\n", pStr);
00140 return false;
00141 }
00142
00143 return true;
00144 }
00145
00146 double log_add(double x, double y)
00147 {
00148 double diff;
00149
00150 if (x < y)
00151 {
00152 diff = x - y;
00153 x = y;
00154 }
00155 else
00156 {
00157 diff = y - x;
00158 }
00159
00160 if (x < GID_LOG_MIN)
00161 return GID_LOG_0;
00162
00163 if (diff < GID_MIN_LOG_DIFF)
00164 return x;
00165
00166 return x + log(1.0 + exp(diff));
00167 }
00168
00169 void log_like2post_prob(
00170 float& scoreFemale,
00171 float& scoreMale,
00172 float sharpness = 1.0f,
00173 float balance = 0.0f)
00174 {
00175 balance = (balance + 1.0f) / 2.0f;
00176 scoreMale = sharpness * scoreMale + logf(1.0f - balance);
00177 scoreFemale = sharpness * scoreFemale + logf(balance);
00178
00179 if (scoreMale < GID_LOWER_SCORE_THRESHOLD)
00180 scoreMale = GID_LOWER_SCORE_THRESHOLD;
00181
00182 if (scoreFemale < GID_LOWER_SCORE_THRESHOLD)
00183 scoreFemale = GID_LOWER_SCORE_THRESHOLD;
00184
00185 float sum = log_add(scoreMale, scoreFemale);
00186
00187 scoreMale -= sum;
00188 scoreFemale -= sum;
00189
00190
00191
00192 sum = log_add(scoreMale, scoreFemale);
00193
00194 scoreMale = expf(scoreMale - sum) * 100.0f;
00195 scoreFemale = expf(scoreFemale - sum) * 100.0f;
00196 }
00197
00198 bool process_file(const char *pInputFile,
00199 FILE *pOutput,
00200 SGenderID2I *pGenderId,
00201 SScoreMemoryI *pScoreMemory,
00202 float sharpness,
00203 float balance,
00204 float unkThreshold,
00205 bool dumpAllScores)
00206 {
00207 static bool header_printed = false;
00208
00209 gErrorHandler.LogMessage("Processing file: %s", pInputFile);
00210 if (!pGenderId->TestFile(pInputFile))
00211 return false;
00212
00213 if (pScoreMemory->Empty())
00214 return false;
00215
00216 SScoresI *pscores_container = pScoreMemory->GetScores();
00217 assert(pscores_container);
00218 int length = pscores_container->GetNScores();
00219 char **ppnames = pscores_container->GetNames();
00220 float *pscores = pscores_container->GetScores();
00221
00222 assert(pscores && ppnames && (GID_GNDR_NGENDERS == length));
00223
00224 log_like2post_prob(
00225 pscores[GID_GNDR_FEMALE],
00226 pscores[GID_GNDR_MALE],
00227 sharpness, balance);
00228
00229 int winner = (pscores[GID_GNDR_FEMALE] > pscores[GID_GNDR_MALE]) ?
00230 GID_GNDR_FEMALE : GID_GNDR_MALE;
00231
00232 if (dumpAllScores)
00233 {
00234 if (!header_printed)
00235 {
00236
00237 for (int i = 0; i < length; i++)
00238 fprintf(pOutput, "%s ", ppnames[i]);
00239 fprintf(pOutput, "\n");
00240
00241 header_printed = true;
00242 }
00243
00244 fprintf(pOutput, "%s %f %f %c\n",
00245 pInputFile,
00246 pscores[GID_GNDR_FEMALE],
00247 pscores[GID_GNDR_MALE],
00248 (pscores[winner] < unkThreshold) ? GID_UNKNOWN_CLASS_LETTER : ppnames[winner][0]);
00249 }
00250 else
00251 {
00252 if (pscores[winner] < unkThreshold)
00253 fprintf(pOutput, "%s %s %f %c\n", pInputFile, GID_UNKNOWN_CLASS_NAME, pscores[winner], ppnames[winner][0]);
00254 else
00255 fprintf(pOutput, "%s %s %f\n", pInputFile, ppnames[winner], pscores[winner]);
00256 }
00257
00258 return true;
00259 }
00260
00261 bool process_list(const char *pInputList,
00262 FILE *pOutput,
00263 SGenderID2I *pGenderId,
00264 SScoreMemoryI *pScoreMemory,
00265 float sharpness,
00266 float balance,
00267 float unkThreshold,
00268 bool dumpAllScores)
00269 {
00270 gErrorHandler.LogMessage("Processing list of files: %s", pInputList);
00271 SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00272 if (!plist)
00273 {
00274 fprintf(stderr, "ERROR: Memory allocation error.\n");
00275 return false;
00276 }
00277
00278 plist->SetErrorHandler(&gErrorHandler);
00279 if (!plist->AddList(pInputList))
00280 {
00281 plist->Release();
00282 return false;
00283 }
00284
00285 char ptarget[1024], psource[1024];
00286 int start, end;
00287 float prob;
00288
00289 plist->FirstLine();
00290 while (plist->GetLine(ptarget, psource, &start, &end, &prob))
00291 {
00292 process_file(psource, pOutput, pGenderId, pScoreMemory, sharpness,
00293 balance, unkThreshold, dumpAllScores);
00294 }
00295
00296 plist->Release();
00297 return true;
00298 }
00299
00300 bool process_dir(const char *pInputDir,
00301 const char *pSuffixes,
00302 FILE *pOutput,
00303 SGenderID2I *pGenderId,
00304 SScoreMemoryI *pScoreMemory,
00305 float sharpness,
00306 float balance,
00307 float unkThreshold,
00308 bool dumpAllScores)
00309 {
00310 gErrorHandler.LogMessage("Processing directory: %s", pInputDir);
00311 SFileSnifferI *pdir = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00312 if (!pdir)
00313 {
00314 fprintf(stderr, "ERROR: Memory allocation error.\n");
00315 return false;
00316 }
00317
00318 pdir->SetErrorHandler(&gErrorHandler);
00319 if (!pdir->AddDirectory(pInputDir))
00320 {
00321 pdir->Release();
00322 return false;
00323 }
00324
00325 char psuffixes[1024];
00326 psuffixes[sizeof(psuffixes) - 1] = '\0';
00327 strncpy(psuffixes, pSuffixes, sizeof(psuffixes) - 1);
00328 char *psuffix = strtok(psuffixes, ",; ");
00329 while (psuffix)
00330 {
00331 pdir->AddWantedSuffix(psuffix);
00332 psuffix = strtok(0, ",; ");
00333 }
00334
00335 if (!pdir->FirstFile())
00336 {
00337 pdir->Release();
00338 return false;
00339 }
00340
00341 char psource[1024];
00342 while(pdir->GetFile(psource, sizeof(psource) - 1))
00343 {
00344 process_file(psource, pOutput, pGenderId, pScoreMemory, sharpness,
00345 balance, unkThreshold, dumpAllScores);
00346 }
00347
00348 pdir->Release();
00349 return true;
00350 }
00351
00352 int main(int argc, char *argv[])
00353 {
00354
00355 InputModeType mode = GID_IMT_NONE;
00356 const char *pconfig_file = 0;
00357 const char *pinput_format = "alaw";
00358 const char *pinput_ext = "alw,wav";
00359 const char *pinput = 0;
00360 const char *poutput_file = 0;
00361 float sharpness = GID_DEF_SCORE_SHARPNESS;
00362 float balance = GID_DEF_GENDER_BALANCE;
00363 float unk_thr = GID_DEF_UNK_CLASS_THR;
00364 int nchannels = GID_DEF_NCHANNELS;
00365 bool dump_all_scores = false;
00366
00367
00368 if(argc == 1)
00369 {
00370 help(argv[0]);
00371 return 0;
00372 }
00373
00374
00375 int c;
00376 optind = 0;
00377 while ((c = getopt(argc, argv, const_cast<char *>("c:i:l:d:e:w:n:o:rs:b:u:v"))) != -1)
00378 {
00379 switch(c)
00380 {
00381 case 'c': pconfig_file = optarg; break;
00382 case 'i': pinput = optarg; mode = GID_IMT_FILE; break;
00383 case 'l': pinput = optarg; mode = GID_IMT_LIST; break;
00384 case 'd': pinput = optarg; mode = GID_IMT_DIR; break;
00385 case 'e': pinput_ext = optarg; break;
00386 case 'w': pinput_format = optarg; break;
00387 case 'o': poutput_file = optarg; break;
00388 case 'r': dump_all_scores = true; break;
00389 case 's': if (!str2float(optarg, &sharpness)) return 1; break;
00390 case 'b': if (!str2float(optarg, &balance)) return 1; break;
00391 case 'u': if (!str2float(optarg, &unk_thr)) return 1; break;
00392 case 'n': nchannels = atoi(optarg); break;
00393 case 'v': gErrorHandler.SetVerbose(); break;
00394 case '?':
00395 fprintf(stderr, "ERROR: Command line parsing error.\n");
00396 return 1;
00397 }
00398 }
00399
00400
00401 if (!pconfig_file)
00402 {
00403 fprintf(stderr, "ERROR: Configuration file was not set (-c).\n");
00404 return 1;
00405 }
00406
00407
00408 if (GID_IMT_NONE == mode)
00409 {
00410 fprintf(stderr, "ERROR: Input file, list or directory must be set (-i | -l | -d).\n");
00411 return 1;
00412 }
00413
00414
00415 if (nchannels < 1)
00416 {
00417 fprintf(stderr, "ERROR: Invalid number of channels (-n).\n");
00418 return 1;
00419 }
00420
00421
00422 if (sharpness < 0.0f)
00423 {
00424 fprintf(stderr, "ERROR: Wrong value of sharpness '%f'. "
00425 "Must be positive number. (-s)\n", sharpness);
00426 return 1;
00427 }
00428
00429
00430 if (balance < -1.0f || balance > 1.0f)
00431 {
00432 fprintf(stderr, "ERROR: Wrong value of balance '%f'. "
00433 "Must be in (-1.0, 1.0). (-b)\n", balance);
00434 return 1;
00435 }
00436
00437
00438 if (unk_thr < 50.0f || unk_thr > 100.0f)
00439 {
00440 fprintf(stderr, "ERROR: Wrong value of unknown class score threshold '%f'. "
00441 "Must be in (50.0, 100.0). (-u)\n", unk_thr);
00442 return 1;
00443 }
00444
00445
00446 SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00447 if (plicman)
00448 {
00449 plicman->SetErrorHandler(&gErrorHandler);
00450 plicman->RegisterLicenseFile("license.dat");
00451 }
00452
00453
00454 SGenderID2I *pgender_id = static_cast<SGenderID2I *>(BSAPICreateInstance(SIID_GENDERID2));
00455 if (!pgender_id)
00456 {
00457 return 1;
00458 }
00459
00460 pgender_id->SetErrorHandler(&gErrorHandler);
00461
00462 if (!pgender_id->Init(pconfig_file))
00463 {
00464 pgender_id->Release();
00465 return 1;
00466 }
00467
00468
00469
00470 SWaveformFormatConvertorI *pwc = pgender_id->GetWaveformFormatConvertor();
00471 if (pwc)
00472 {
00473 pwc->SetNChannels(nchannels);
00474 pwc->SetInputFormatStr(pinput_format);
00475 }
00476
00477
00478
00479
00480 SScoreMemoryI *pscore_mem = static_cast<SScoreMemoryI *>(BSAPICreateInstance(SIID_SCOREMEMORY));
00481 if(!pscore_mem)
00482 {
00483 fprintf(stderr, "ERROR: Memory allocation error.\n");
00484 pgender_id->Release();
00485 return 1;
00486 }
00487
00488 pscore_mem->SetErrorHandler(&gErrorHandler);
00489 pgender_id->SetTarget(pscore_mem);
00490
00491
00492 if (!pgender_id->ActivateAllModels())
00493 {
00494 pgender_id->Release();
00495 pscore_mem->Release();
00496 return 1;
00497 }
00498
00499
00500 SModelListI *pmodel_list = pgender_id->GetModelList();
00501 if (!pmodel_list)
00502 {
00503 pgender_id->Release();
00504 pscore_mem->Release();
00505 return 1;
00506 }
00507
00508 if (GID_GNDR_NGENDERS != pmodel_list->GetNModels())
00509 {
00510 fprintf(stderr, "ERROR: Some of gender models are missing.\n");
00511 pgender_id->Release();
00512 pscore_mem->Release();
00513 return 1;
00514 }
00515
00516
00517 FILE *poutput = poutput_file ? fopen(poutput_file, "w") : stdout;
00518 if (!poutput)
00519 {
00520 fprintf(stderr, "ERROR: Could not open output file '%s'.\n", poutput_file);
00521 pgender_id->Release();
00522 pscore_mem->Release();
00523 return 1;
00524 }
00525
00526
00527 bool processed = false;
00528 switch (mode)
00529 {
00530 case GID_IMT_FILE:
00531 processed = process_file(
00532 pinput, poutput, pgender_id, pscore_mem, sharpness, balance, unk_thr, dump_all_scores);
00533 break;
00534
00535 case GID_IMT_LIST:
00536 processed = process_list(
00537 pinput, poutput, pgender_id, pscore_mem, sharpness, balance, unk_thr, dump_all_scores);
00538 break;
00539
00540 case GID_IMT_DIR:
00541 processed = process_dir(
00542 pinput, pinput_ext, poutput, pgender_id, pscore_mem, sharpness, balance, unk_thr, dump_all_scores);
00543 break;
00544
00545 case GID_IMT_NONE:
00546 break;
00547 }
00548
00549
00550 if (poutput_file)
00551 fclose(poutput);
00552
00553 pgender_id->Release();
00554 pscore_mem->Release();
00555 return (processed ? 0 : 1);
00556 }