|
24 | 24 | #include "ift/encoder/glyph_segmentation.h" |
25 | 25 | #include "ift/encoder/merge_strategy.h" |
26 | 26 | #include "ift/encoder/subset_definition.h" |
| 27 | +#include "ift/freq/bigram_probability_calculator.h" |
27 | 28 | #include "ift/freq/unicode_frequencies.h" |
28 | 29 | #include "ift/proto/patch_encoding.h" |
29 | 30 | #include "ift/proto/patch_map.h" |
@@ -139,8 +140,7 @@ ABSL_FLAG(uint32_t, network_overhead_cost, 75, |
139 | 140 | "for each network request."); |
140 | 141 |
|
141 | 142 | // TODO(garretrieger): add additional setting for cost base merging that |
142 | | -// configures a minimum |
143 | | -// grouping size (in terms of number of codepoints). |
| 143 | +// configures a minimum grouping size (in terms of number of codepoints). |
144 | 144 |
|
145 | 145 | ABSL_FLAG(std::vector<std::string>, optional_feature_tags, {}, |
146 | 146 | "A list of feature tags which can be optionally added to the font " |
@@ -168,6 +168,7 @@ using ift::encoder::GlyphSegmentation; |
168 | 168 | using ift::encoder::MergeStrategy; |
169 | 169 | using ift::encoder::Segment; |
170 | 170 | using ift::encoder::SubsetDefinition; |
| 171 | +using ift::freq::BigramProbabilityCalculator; |
171 | 172 | using ift::freq::UnicodeFrequencies; |
172 | 173 | using ift::proto::PatchEncoding; |
173 | 174 | using ift::proto::PatchMap; |
@@ -468,6 +469,63 @@ StatusOr<UnicodeFrequencies> GetFrequencyData( |
468 | 469 | return util::LoadFrequenciesFromRiegeli(frequency_data_file.c_str()); |
469 | 470 | } |
470 | 471 |
|
| 472 | +// Analysis of segmentation that does not utilize codepoint frequencies. |
| 473 | +static int NonFrequencyAnalysis(hb_face_t* font, |
| 474 | + const GlyphSegmentation& segmentation) { |
| 475 | + auto cost = SegmentationSize(font, segmentation); |
| 476 | + if (!cost.ok()) { |
| 477 | + std::cerr << "Failed to compute segmentation cost: " << cost.status() |
| 478 | + << std::endl; |
| 479 | + return -1; |
| 480 | + } |
| 481 | + auto ideal_cost = IdealSegmentationSize(font, segmentation, |
| 482 | + NumExclusivePatches(segmentation)); |
| 483 | + if (!ideal_cost.ok()) { |
| 484 | + std::cerr << "Failed to compute ideal segmentation cost: " << cost.status() |
| 485 | + << std::endl; |
| 486 | + return -1; |
| 487 | + } |
| 488 | + |
| 489 | + std::cerr << std::endl; |
| 490 | + std::cerr << "glyphs_in_fallback = " << segmentation.UnmappedGlyphs().size() |
| 491 | + << std::endl; |
| 492 | + std::cerr << "ideal_cost_bytes = " << *ideal_cost << std::endl; |
| 493 | + std::cerr << "total_cost_bytes = " << *cost << std::endl; |
| 494 | + |
| 495 | + double over_ideal_percent = |
| 496 | + (((double)*cost) / ((double)*ideal_cost) * 100.0) - 100.0; |
| 497 | + std::cerr << "%_extra_over_ideal = " << over_ideal_percent << std::endl; |
| 498 | + return 0; |
| 499 | +} |
| 500 | + |
| 501 | +static int AnalysisWithFrequency(hb_face_t* font, |
| 502 | + const GlyphSegmentation& segmentation) { |
| 503 | + auto freq_data = |
| 504 | + GetFrequencyData(absl::GetFlag(FLAGS_frequency_data_file), {}); |
| 505 | + if (!freq_data.ok()) { |
| 506 | + std::cerr << "Failed to load codepoint frequencies: " << freq_data.status() |
| 507 | + << std::endl; |
| 508 | + return -1; |
| 509 | + } |
| 510 | + |
| 511 | + BigramProbabilityCalculator calculator(std::move(*freq_data)); |
| 512 | + |
| 513 | + ClosureGlyphSegmenter segmenter; |
| 514 | + auto cost = segmenter.TotalCost(font, segmentation, calculator); |
| 515 | + if (!cost.ok()) { |
| 516 | + std::cerr << "Failed to compute cost of segmentation. " << cost.status() |
| 517 | + << std::endl; |
| 518 | + return -1; |
| 519 | + } |
| 520 | + |
| 521 | + std::cerr << "non_ift_cost_bytes = " << (uint64_t)cost->cost_for_non_segmented |
| 522 | + << std::endl; |
| 523 | + std::cerr << "total_cost_bytes = " << (uint64_t)cost->total_cost << std::endl; |
| 524 | + std::cerr << "ideal_cost_bytes = " << (uint64_t)cost->ideal_cost << std::endl; |
| 525 | + |
| 526 | + return 0; |
| 527 | +} |
| 528 | + |
471 | 529 | int main(int argc, char** argv) { |
472 | 530 | absl::SetStderrThreshold(absl::LogSeverityAtLeast::kInfo); |
473 | 531 | auto args = absl::ParseCommandLine(argc, argv); |
@@ -583,29 +641,9 @@ int main(int argc, char** argv) { |
583 | 641 | } |
584 | 642 |
|
585 | 643 | std::cerr << ">> Analysis" << std::endl; |
586 | | - auto cost = SegmentationSize(font->get(), *result); |
587 | | - if (!cost.ok()) { |
588 | | - std::cerr << "Failed to compute segmentation cost: " << cost.status() |
589 | | - << std::endl; |
590 | | - return -1; |
591 | | - } |
592 | | - auto ideal_cost = |
593 | | - IdealSegmentationSize(font->get(), *result, NumExclusivePatches(*result)); |
594 | | - if (!ideal_cost.ok()) { |
595 | | - std::cerr << "Failed to compute ideal segmentation cost: " << cost.status() |
596 | | - << std::endl; |
597 | | - return -1; |
| 644 | + if (FrequenciesAreRequired()) { |
| 645 | + return AnalysisWithFrequency(font->get(), *result); |
| 646 | + } else { |
| 647 | + return NonFrequencyAnalysis(font->get(), *result); |
598 | 648 | } |
599 | | - |
600 | | - std::cerr << std::endl; |
601 | | - std::cerr << "glyphs_in_fallback = " << result->UnmappedGlyphs().size() |
602 | | - << std::endl; |
603 | | - std::cerr << "ideal_cost_bytes = " << *ideal_cost << std::endl; |
604 | | - std::cerr << "total_cost_bytes = " << *cost << std::endl; |
605 | | - |
606 | | - double over_ideal_percent = |
607 | | - (((double)*cost) / ((double)*ideal_cost) * 100.0) - 100.0; |
608 | | - std::cerr << "%_extra_over_ideal = " << over_ideal_percent << std::endl; |
609 | | - |
610 | | - return 0; |
611 | 649 | } |
0 commit comments