1 <?php
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
22
23 require_once(PHPMORPHY_DIR . '/gramtab.php');
24 require_once(PHPMORPHY_DIR . '/unicode.php');
25
26
27
28
29 interface phpMorphy_Morphier_Interface {
30 function getAnnot($word);
31 function getBaseForm($word);
32 function getAllForms($word);
33 function getPseudoRoot($word);
34 function getPartOfSpeech($word);
35 function getWordDescriptor($word);
36 function getAllFormsWithAncodes($word);
37 function getAncode($word);
38 function getGrammarInfoMergeForms($word);
39 function getGrammarInfo($word);
40 }
41
42 class phpMorphy_Morphier_Empty implements phpMorphy_Morphier_Interface {
43 function getAnnot($word) { return false; }
44 function getBaseForm($word) { return false; }
45 function getAllForms($word) { return false; }
46 function getAllFormsWithGramInfo($word) { return false; }
47 function getPseudoRoot($word) { return false; }
48 function getPartOfSpeech($word) { return false; }
49 function getWordDescriptor($word) { return false; }
50 function getAllFormsWithAncodes($word) { return false; }
51 function getAncode($word) { return false; }
52 function getGrammarInfoMergeForms($word) { return false; }
53 function getGrammarInfo($word) { return false; }
54 function castFormByGramInfo($word, $partOfSpeech, $grammems, $returnWords = false, $callback = null) { return false; }
55 }
56
57
58
59
60 interface phpMorphy_AnnotDecoder_Interface {
61 function decode($annotsRaw, $withBase);
62 };
63
64 abstract class phpMorphy_AnnotDecoder_Base implements phpMorphy_AnnotDecoder_Interface {
65 const INVALID_ANCODE_ID = 0xFFFF;
66 protected
67 $ends,
68 $unpack_str,
69 $block_size;
70
71 function __construct($ends) {
72 $this->ends = $ends;
73
74 $this->unpack_str = $this->getUnpackString();
75 $this->block_size = $this->getUnpackBlockSize();
76 }
77
78 abstract protected function getUnpackString();
79 abstract protected function getUnpackBlockSize();
80
81 function decode($annotRaw, $withBase) {
82 if(empty($annotRaw)) {
83 throw new phpMorphy_Exception("Empty annot given");
84 }
85
86 $unpack_str = $this->unpack_str;
87 $unpack_size = $this->block_size;
88
89 $result = unpack("Vcount/$unpack_str", $annotRaw);
90
91 if(false === $result) {
92 throw new phpMorphy_Exception("Invalid annot string '$annotRaw'");
93 }
94
95 if($result['common_ancode'] == self::INVALID_ANCODE_ID) {
96 $result['common_ancode'] = null;
97 }
98
99 $count = $result['count'];
100
101 $result = array($result);
102
103 if($count > 1) {
104 for($i = 0; $i < $count - 1; $i++) {
105 $res = unpack($unpack_str, $GLOBALS['__phpmorphy_substr']($annotRaw, 4 + ($i + 1) * $unpack_size, $unpack_size));
106
107 if($res['common_ancode'] == self::INVALID_ANCODE_ID) {
108 $res['common_ancode'] = null;
109 }
110
111 $result[] = $res;
112 }
113 }
114
115 if($withBase) {
116 $items = explode($this->ends, $GLOBALS['__phpmorphy_substr']($annotRaw, 4 + $count * $unpack_size));
117 for($i = 0; $i < $count; $i++) {
118 $result[$i]['base_prefix'] = $items[$i * 2];
119 $result[$i]['base_suffix'] = $items[$i * 2 + 1];
120 }
121 }
122
123 return $result;
124 }
125 }
126
127 class phpMorphy_AnnotDecoder_Common extends phpMorphy_AnnotDecoder_Base {
128 protected function getUnpackString() {
129 return 'Voffset/vcplen/vplen/vflen/vcommon_ancode/vforms_count/vpacked_forms_count/vaffixes_size/vform_no/vpos_id';
130
131 }
132
133 protected function getUnpackBlockSize() {
134 return 22;
135 }
136 }
137
138 class phpMorphy_AnnotDecoder_Predict extends phpMorphy_AnnotDecoder_Common {
139 protected function getUnpackString() {
140
141 return parent::getUnpackString() . '/vfreq';
142 }
143
144 protected function getUnpackBlockSize() {
145 return parent::getUnpackBlockSize() + 2;
146 }
147 }
148
149 class phpMorphy_AnnotDecoder_Factory {
150 protected static $instances = array();
151
152 protected
153 $cache_common,
154 $cache_predict,
155 $eos;
156
157 protected function __construct($eos) {
158 $this->eos = $eos;
159 }
160
161 static function create($eos) {
162 if(!isset(self::$instances[$eos])) {
163 self::$instances[$eos] = new phpMorphy_AnnotDecoder_Factory($eos);
164 }
165
166 return self::$instances[$eos];
167 }
168
169 function getCommonDecoder() {
170 if(!isset($this->cache_common)) {
171 $this->cache_common = $this->instantinate('common');
172 }
173
174 return $this->cache_common;
175 }
176
177 function getPredictDecoder() {
178 if(!isset($this->cache_predict)) {
179 $this->cache_predict = $this->instantinate('predict');
180 }
181
182 return $this->cache_predict;
183 }
184
185 protected function instantinate($type) {
186 $clazz = 'phpMorphy_AnnotDecoder_' . ucfirst($GLOBALS['__phpmorphy_strtolower']($type));
187
188 return new $clazz($this->eos);
189 }
190 }
191
192 interface phpMorphy_AncodesResolver_Interface {
193 function resolve($ancodeId);
194 function unresolve($ancode);
195 }
196
197 class phpMorphy_AncodesResolver_Proxy implements phpMorphy_AncodesResolver_Interface {
198 protected
199 $args, $class;
200
201
202
203 function __construct($class, $ctorArgs) {
204 $this->class = $class;
205 $this->args = $ctorArgs;
206 }
207
208 function unresolve($ancode) {
209 return $this->__obj->unresolve($ancode);
210 }
211
212 function resolve($ancodeId) {
213 return $this->__obj->resolve($ancodeId);
214 }
215
216 static function instantinate($class, $args) {
217 $ref = new ReflectionClass($class);
218 return $ref->newInstanceArgs($args);
219 }
220
221 function __get($propName) {
222 if($propName === '__obj') {
223 $this->__obj = $this->instantinate($this->class, $this->args);
224
225 unset($this->args);
226 unset($this->class);
227
228 return $this->__obj;
229 }
230
231 throw new phpMorphy_Exception("Unknown '$propName' property");
232 }
233 }
234
235 class phpMorphy_AncodesResolver_ToText implements phpMorphy_AncodesResolver_Interface {
236 protected $gramtab;
237
238 function __construct(phpMorphy_GramTab_Interface $gramtab) {
239 $this->gramtab = $gramtab;
240 }
241
242 function resolve($ancodeId) {
243 if(!isset($ancodeId)) {
244 return null;
245 }
246
247 return $this->gramtab->ancodeToString($ancodeId);
248 }
249
250 function unresolve($ancode) {
251 return $this->gramtab->stringToAncode($ancode);
252
253 }
254 }
255
256 class phpMorphy_AncodesResolver_ToDialingAncodes implements phpMorphy_AncodesResolver_Interface {
257 protected
258 $ancodes_map,
259 $reverse_map;
260
261 function __construct(phpMorphy_Storage $ancodesMap) {
262 if(false === ($this->ancodes_map = unserialize($ancodesMap->read(0, $ancodesMap->getFileSize())))) {
263 throw new phpMorphy_Exception("Can`t open phpMorphy => Dialing ancodes map");
264 }
265
266 $this->reverse_map = array_flip($this->ancodes_map);
267 }
268
269 function unresolve($ancode) {
270 if(!isset($ancode)) {
271 return null;
272 }
273
274 if(!isset($this->reverse_map[$ancode])) {
275 throw new phpMorphy_Exception("Unknwon ancode found '$ancode'");
276 }
277
278 return $this->reverse_map[$ancode];
279 }
280
281 function resolve($ancodeId) {
282 if(!isset($ancodeId)) {
283 return null;
284 }
285
286 if(!isset($this->ancodes_map[$ancodeId])) {
287 throw new phpMorphy_Exception("Unknwon ancode id found '$ancodeId'");
288 }
289
290 return $this->ancodes_map[$ancodeId];
291 }
292 }
293
294 class phpMorphy_AncodesResolver_AsIs implements phpMorphy_AncodesResolver_Interface {
295
296 function __construct() {
297 }
298
299 function resolve($ancodeId) {
300 return $ancodeId;
301 }
302
303 function unresolve($ancode) {
304 return $ancode;
305 }
306 }
307
308
309
310
311 class phpMorphy_Morphier_Helper {
312 protected
313 $graminfo,
314 $annot_decoder,
315 $char_size,
316 $ends,
317 $gramtab,
318 $ancodes_resolver,
319 $gramtab_consts_included = false,
320 $resolve_pos;
321
322 function __construct(
323 phpMorphy_GramInfo_Interace $graminfo,
324 phpMorphy_GramTab_Interface $gramtab,
325 phpMorphy_AncodesResolver_Interface $ancodesResolver,
326 $resolvePartOfSpeech
327 ) {
328 $this->graminfo = $graminfo;
329 $this->gramtab = $gramtab;
330 $this->resolve_pos = (bool)$resolvePartOfSpeech;
331 $this->ancodes_resolver = $ancodesResolver;
332
333 $this->char_size = $graminfo->getCharSize();
334
335 $this->ends = $graminfo->getEnds();
336 }
337
338 function setAnnotDecoder(phpMorphy_AnnotDecoder_Interface $annotDecoder) {
339 $this->annot_decoder = $annotDecoder;
340 }
341
342
343 function getEndOfString() {
344 return $this->ends;
345 }
346
347 function getCharSize() {
348 return $this->char_size;
349 }
350
351 function hasAnnotDecoder() {
352 return isset($this->annot_decoder);
353 }
354
355 function getAnnotDecoder() {
356 return $this->annot_decoder;
357 }
358
359 function getAncodesResolver() {
360 return $this->ancodes_resolver;
361 }
362
363 function getGramInfo() {
364 return $this->graminfo;
365 }
366
367 function getGramTab() {
368 return $this->gramtab;
369 }
370
371 function isResolvePartOfSpeech() {
372 return $this->resolve_pos;
373 }
374
375
376 function resolvePartOfSpeech($posId) {
377 return $this->gramtab->resolvePartOfSpeechId($posId);
378 }
379
380 function getGrammems($ancodeId) {
381 return $this->gramtab->getGrammems($ancodeId);
382 }
383
384 function getGrammemsAndPartOfSpeech($ancodeId) {
385 return array(
386 $this->gramtab->getPartOfSpeech($ancodeId),
387 $this->gramtab->getGrammems($ancodeId)
388 );
389 }
390
391 function ($annot) {
392 if($this->resolve_pos) {
393 return $this->resolvePartOfSpeech($annot['pos_id']);
394 } else {
395 return $annot['pos_id'];
396 }
397 }
398
399 protected function includeGramTabConsts() {
400 if($this->isResolvePartOfSpeech()) {
401 $this->gramtab->includeConsts();
402 }
403
404 $this->gramtab_consts_included = true;
405 }
406
407
408 function getWordDescriptor($word, $annots) {
409 if(!$this->gramtab_consts_included) {
410 $this->includeGramTabConsts();
411 }
412
413 return new phpMorphy_WordDescriptor_Collection($word, $annots, $this);
414 }
415
416 protected function getBaseAndPrefix($word, $cplen, $plen, $flen) {
417 if($flen) {
418 $base = $GLOBALS['__phpmorphy_substr']($word, $cplen + $plen, -$flen);
419 } else {
420 if($cplen || $plen) {
421 $base = $GLOBALS['__phpmorphy_substr']($word, $cplen + $plen);
422 } else {
423 $base = $word;
424 }
425 }
426
427 $prefix = $cplen ? $GLOBALS['__phpmorphy_substr']($word, 0, $cplen) : '';
428
429 return array($base, $prefix);
430 }
431
432 function getPartOfSpeech($word, $annots) {
433 if(false === $annots) {
434 return false;
435 }
436
437 $result = array();
438
439 foreach($this->decodeAnnot($annots, false) as $annot) {
440 $result[$this->extractPartOfSpeech($annot)] = 1;
441 }
442
443 return array_keys($result);
444 }
445
446 function getBaseForm($word, $annots) {
447 if(false === $annots) {
448 return false;
449 }
450
451 $annots = $this->decodeAnnot($annots, true);
452
453 return $this->composeBaseForms($word, $annots);
454 }
455
456 function getPseudoRoot($word, $annots) {
457 if(false === $annots) {
458 return false;
459 }
460
461 $annots = $this->decodeAnnot($annots, false);
462
463 $result = array();
464
465 foreach($annots as $annot) {
466 list($base) = $this->getBaseAndPrefix(
467 $word,
468 $annot['cplen'],
469 $annot['plen'],
470 $annot['flen']
471 );
472
473 $result[$base] = 1;
474 }
475
476 return array_keys($result);
477 }
478
479 function getAllForms($word, $annots) {
480 if(false === $annots) {
481 return false;
482 }
483
484 $annots = $this->decodeAnnot($annots, false);
485
486 return $this->composeForms($word, $annots);
487 }
488
489 function castFormByGramInfo($word, $annots, $partOfSpeech, $grammems, $returnWords = false, $callback = null) {
490 if(false === $annots) {
491 return false;
492 }
493
494 if(isset($callback) && !is_callable($callback)) {
495 throw new phpMorphy_Exception("Invalid callback given");
496 }
497
498 $result = array();
499 $grammems = (array)$grammems;
500 $partOfSpeech = isset($partOfSpeech) ? (string)$partOfSpeech : null;
501
502 foreach($this->decodeAnnot($annots, false) as $annot) {
503 $all_ancodes = $this->graminfo->readAncodes($annot);
504 $flexias = $this->graminfo->readFlexiaData($annot);
505 $common_ancode = $annot['common_ancode'];
506 $common_grammems = isset($common_ancode) ? $this->gramtab->getGrammems($common_ancode) : array();
507
508 list($base, $prefix) = $this->getBaseAndPrefix(
509 $word,
510 $annot['cplen'],
511 $annot['plen'],
512 $annot['flen']
513 );
514
515
516 $i = 0;
517 $form_no = 0;
518 foreach($all_ancodes as $form_ancodes) {
519 foreach($form_ancodes as $ancode) {
520 $form_pos = $this->gramtab->getPartOfSpeech($ancode);
521 $form_grammems = array_merge($this->gramtab->getGrammems($ancode), $common_grammems);
522 $form = $prefix . $flexias[$i] . $base . $flexias[$i + 1];
523
524 if(isset($callback)) {
525 if(!call_user_func($callback, $form, $form_pos, $form_grammems, $form_no)) {
526 $form_no++;
527 continue;
528 }
529 } else {
530 if(isset($partOfSpeech) && $form_pos !== $partOfSpeech) {
531 $form_no++;
532 continue;
533 }
534
535 if(count(array_diff($grammems, $form_grammems)) > 0) {
536 $form_no++;
537 continue;
538 }
539 }
540
541 if($returnWords) {
542 $result[$form] = 1;
543 } else {
544 $result[] = array(
545 'form' => $form,
546 'form_no' => $form_no,
547 'pos' => $form_pos,
548 'grammems' => $form_grammems
549 );
550 }
551
552 $form_no++;
553 }
554
555 $i += 2;
556 }
557 }
558
559 return $returnWords ? array_keys($result) : $result;
560 }
561
562 function getAncode($annots) {
563 if(false === $annots) {
564 return false;
565 }
566
567 $result = array();
568
569 foreach($this->decodeAnnot($annots, false) as $annot) {
570 $all_ancodes = $this->graminfo->readAncodes($annot);
571
572 $result[] = array(
573 'common' => $this->ancodes_resolver->resolve($annot['common_ancode']),
574 'all' => array_map(
575 array($this->ancodes_resolver, 'resolve'),
576 $all_ancodes[$annot['form_no']]
577 )
578 );
579 }
580
581 return $this->array_unique($result);
582 }
583
584 protected static function array_unique($array) {
585 static $need_own;
586
587 if(!isset($need_own)) {
588 $need_own = -1 === version_compare(PHP_VERSION, '5.2.9');
589 }
590
591 if($need_own) {
592 $result = array();
593
594 foreach(array_keys(array_unique(array_map('serialize', $array))) as $key) {
595 $result[$key] = $array[$key];
596 }
597
598 return $result;
599 } else {
600 return array_unique($array, SORT_REGULAR);
601 }
602 }
603
604
605 function getGrammarInfoMergeForms($annots) {
606 if(false === $annots) {
607 return false;
608 }
609
610 $result = array();
611
612 foreach($this->decodeAnnot($annots, false) as $annot) {
613 $all_ancodes = $this->graminfo->readAncodes($annot);
614 $common_ancode = $annot['common_ancode'];
615 $grammems = isset($common_ancode) ? $this->gramtab->getGrammems($common_ancode) : array();
616
617 $forms_count = 0;
618 $form_no = $annot['form_no'];
619
620 foreach($all_ancodes[$form_no] as $ancode) {
621 $grammems = array_merge($grammems, $this->gramtab->getGrammems($ancode));
622 $forms_count++;
623 }
624
625 $grammems = array_unique($grammems);
626 sort($grammems);
627
628 $result[] = array(
629
630 'pos' => $this->gramtab->getPartOfSpeech($ancode),
631 'grammems' => $grammems,
632 'forms_count' => $forms_count,
633 'form_no_low' => $form_no,
634 'form_no_high' => $form_no + $forms_count,
635 );
636 }
637
638 return $this->array_unique($result);
639 }
640
641 function getGrammarInfo($annots) {
642 if(false === $annots) {
643 return false;
644 }
645
646 $result = array();
647
648 foreach($this->decodeAnnot($annots, false) as $annot) {
649 $all_ancodes = $this->graminfo->readAncodes($annot);
650 $common_ancode = $annot['common_ancode'];
651 $common_grammems = isset($common_ancode) ? $this->gramtab->getGrammems($common_ancode) : array();
652
653 $info = array();
654
655 $form_no = $annot['form_no'];
656 foreach($all_ancodes[$form_no] as $ancode) {
657 $grammems =
658 array_merge($common_grammems, $this->gramtab->getGrammems($ancode));
659
660
661 sort($grammems);
662
663 $info_item = array(
664 'pos' => $this->gramtab->getPartOfSpeech($ancode),
665 'grammems' => $grammems,
666 'form_no' => $form_no,
667 );
668
669
670 $info[] = $info_item;
671 }
672
673 $unique_info = $this->array_unique($info);
674 sort($unique_info);
675 $result[] = $unique_info;
676 }
677
678 return $this->array_unique($result);
679 }
680
681 function getAllFormsWithResolvedAncodes($word, $annots, $resolveType = 'no_resolve') {
682 if(false === $annots) {
683 return false;
684 }
685
686 $annots = $this->decodeAnnot($annots, false);
687
688 return $this->composeFormsWithResolvedAncodes($word, $annots);
689 }
690
691 function getAllFormsWithAncodes($word, $annots, &$foundFormNo = array()) {
692 if(false === $annots) {
693 return false;
694 }
695
696 $annots = $this->decodeAnnot($annots, false);
697
698 return $this->composeFormsWithAncodes($word, $annots, $foundFormNo);
699 }
700
701 function getAllAncodes($word, $annots) {
702 if(false === $annots) {
703 return false;
704 }
705
706 $result = array();
707
708 foreach($annots as $annot) {
709 $result[] = $this->graminfo->readAncodes($annot);
710 }
711
712 return $result;
713 }
714
715 protected function composeBaseForms($word, $annots) {
716 $result = array();
717
718 foreach($annots as $annot) {
719
720 if($annot['form_no'] > 0) {
721 list($base, $prefix) = $this->getBaseAndPrefix(
722 $word,
723 $annot['cplen'],
724 $annot['plen'],
725 $annot['flen']
726 );
727
728 $result[$prefix . $annot['base_prefix'] . $base . $annot['base_suffix']] = 1;
729 } else {
730 $result[$word] = 1;
731 }
732 }
733
734 return array_keys($result);
735 }
736
737 protected function composeForms($word, $annots) {
738 $result = array();
739
740 foreach($annots as $annot) {
741 list($base, $prefix) = $this->getBaseAndPrefix(
742 $word,
743 $annot['cplen'],
744 $annot['plen'],
745 $annot['flen']
746 );
747
748
749 $flexias = $this->graminfo->readFlexiaData($annot);
750
751 for($i = 0, $c = count($flexias); $i < $c; $i += 2) {
752 $result[$prefix . $flexias[$i] . $base . $flexias[$i + 1]] = 1;
753 }
754 }
755
756 return array_keys($result);
757 }
758
759 protected function composeFormsWithResolvedAncodes($word, $annots) {
760 $result = array();
761
762 foreach($annots as $annotIdx => $annot) {
763 list($base, $prefix) = $this->getBaseAndPrefix(
764 $word,
765 $annot['cplen'],
766 $annot['plen'],
767 $annot['flen']
768 );
769
770 $words = array();
771 $ancodes = array();
772 $common_ancode = $annot['common_ancode'];
773
774
775 $flexias = $this->graminfo->readFlexiaData($annot);
776 $all_ancodes = $this->graminfo->readAncodes($annot);
777
778 for($i = 0, $c = count($flexias); $i < $c; $i += 2) {
779 $form = $prefix . $flexias[$i] . $base . $flexias[$i + 1];
780
781 $current_ancodes = $all_ancodes[$i / 2];
782 foreach($current_ancodes as $ancode) {
783 $words[] = $form;
784 $ancodes[] = $this->ancodes_resolver->resolve($ancode);
785 }
786 }
787
788 $result[] = array(
789 'forms' => $words,
790 'common' => $this->ancodes_resolver->resolve($common_ancode),
791 'all' => $ancodes,
792 );
793 }
794
795 return $result;
796 }
797
798 protected function composeFormsWithAncodes($word, $annots, &$foundFormNo) {
799 $result = array();
800
801 foreach($annots as $annotIdx => $annot) {
802 list($base, $prefix) = $this->getBaseAndPrefix(
803 $word,
804 $annot['cplen'],
805 $annot['plen'],
806 $annot['flen']
807 );
808
809
810 $flexias = $this->graminfo->readFlexiaData($annot);
811 $ancodes = $this->graminfo->readAncodes($annot);
812
813 $found_form_no = $annot['form_no'];
814
815 $foundFormNo = !is_array($foundFormNo) ? array() : $foundFormNo;
816
817 for($i = 0, $c = count($flexias); $i < $c; $i += 2) {
818 $form_no = $i / 2;
819 $word = $prefix . $flexias[$i] . $base . $flexias[$i + 1];
820
821 if($found_form_no == $form_no) {
822 $count = count($result);
823 $foundFormNo[$annotIdx]['low'] = $count;
824 $foundFormNo[$annotIdx]['high'] = $count + count($ancodes[$form_no]) - 1;
825 }
826
827 foreach($ancodes[$form_no] as $ancode) {
828 $result[] = array($word, $ancode);
829 }
830 }
831 }
832
833 return $result;
834 }
835
836 function decodeAnnot($annotsRaw, $withBase) {
837 if(is_array($annotsRaw)) {
838 return $annotsRaw;
839 } else {
840 return $this->annot_decoder->decode($annotsRaw, $withBase);
841 }
842 }
843 }
844
845
846
847
848
849 class phpMorphy_WordDescriptor_Collection implements Countable, IteratorAggregate, ArrayAccess {
850 protected
851 $word,
852 $descriptors = array(),
853 $helper;
854
855 function __construct($word, $annots, phpMorphy_Morphier_Helper $helper) {
856 $this->word = (string)$word;
857 $this->annots = false === $annots ? false : $helper->decodeAnnot($annots, true);
858
859 $this->helper = $helper;
860
861 if(false !== $this->annots) {
862 foreach($this->annots as $annot) {
863 $this->descriptors[] = $this->createDescriptor($word, $annot, $helper);
864 }
865 }
866 }
867
868 protected function createDescriptor($word, $annot, phpMorphy_Morphier_Helper $helper) {
869 return new phpMorphy_WordDescriptor($word, $annot, $helper);
870 }
871
872 function getDescriptor($index) {
873 if(!$this->offsetExists($index)) {
874 throw new phpMorphy_Exception("Invalid index '$index' specified");
875 }
876
877 return $this->descriptors[$index];
878 }
879
880 function getByPartOfSpeech($poses) {
881 $result = array();
882 settype($poses, 'array');
883
884 foreach($this as $desc) {
885 if($desc->hasPartOfSpeech($poses)) {
886 $result[] = $desc;
887 }
888 }
889
890
891 return $result;
892 }
893
894 function offsetExists($off) {
895 return isset($this->descriptors[$off]);
896 }
897
898 function offsetUnset($off) {
899 throw new phpMorphy_Exception(__CLASS__ . " is not mutable");
900 }
901
902 function offsetSet($off, $value) {
903 throw new phpMorphy_Exception(__CLASS__ . " is not mutable");
904 }
905
906 function offsetGet($off) {
907 return $this->getDescriptor($off);
908 }
909
910 function count() {
911 return count($this->descriptors);
912 }
913
914 function getIterator() {
915 return new ArrayIterator($this->descriptors);
916 }
917 }
918
919 class phpMorphy_WordForm {
920 protected
921 $word,
922 $form_no,
923 $pos_id,
924 $grammems
925 ;
926
927 function __construct($word, $form_no, $pos_id, $grammems) {
928 $this->word = (string)$word;
929 $this->form_no = (int)$form_no;
930 $this->pos_id = $pos_id;
931
932 sort($grammems);
933 $this->grammems = $grammems;
934 }
935
936 function getPartOfSpeech() {
937 return $this->pos_id;
938 }
939
940 function getGrammems() {
941 return $this->grammems;
942 }
943
944 function hasGrammems($grammems) {
945 $grammems = (array)$grammems;
946
947 $grammes_count = count($grammems);
948 return $grammes_count && count(array_intersect($grammems, $this->grammems)) == $grammes_count;
949 }
950
951 static function compareGrammems($a, $b) {
952 return count($a) == count($b) && count(array_diff($a, $b)) == 0;
953 }
954
955 function getWord() {
956 return $this->word;
957 }
958
959 function getFormNo() {
960 return $this->form_no;
961 }
962 }
963
964 class phpMorphy_WordDescriptor implements Countable, ArrayAccess, IteratorAggregate {
965 protected
966 $word,
967 $annot,
968 $helper,
969 $cached_forms,
970 $cached_base,
971 $cached_pseudo_root,
972 $all_forms,
973 $found_form_no,
974 $common_ancode_grammems;
975
976 function __construct($word, $annot, phpMorphy_Morphier_Helper $helper) {
977 $this->word = (string)$word;
978 $this->annot = array($annot);
979
980 $this->helper = $helper;
981 }
982
983 function getPseudoRoot() {
984 if(!isset($this->cached_pseudo_root)) {
985 list($this->cached_pseudo_root) = $this->helper->getPseudoRoot($this->word, $this->annot);
986 }
987
988 return $this->cached_pseudo_root;
989 }
990
991 function getBaseForm() {
992 if(!isset($this->cached_base)) {
993 list($this->cached_base) = $this->helper->getBaseForm($this->word, $this->annot);
994 }
995
996 return $this->cached_base;
997 }
998
999 function getAllForms() {
1000 if(!isset($this->cached_forms)) {
1001 $this->cached_forms = $this->helper->getAllForms($this->word, $this->annot);
1002 }
1003
1004 return $this->cached_forms;
1005 }
1006
1007 function getWordForm($index) {
1008 $this->readAllForms();
1009
1010 if(!$this->offsetExists($index)) {
1011 throw new phpMorphy_Exception("Invalid index '$index' given");
1012 }
1013
1014 return $this->all_forms[$index];
1015 }
1016
1017 protected function createWordForm($word, $form_no, $ancode) {
1018 if(!isset($this->common_ancode_grammems)) {
1019 $common_ancode = $this->annot[0]['common_ancode'];
1020
1021 $this->common_ancode_grammems = isset($common_ancode) ?
1022 $this->helper->getGrammems($common_ancode) :
1023 array();
1024 }
1025
1026 list($pos_id, $all_grammems) = $this->helper->getGrammemsAndPartOfSpeech($ancode);
1027
1028 return new phpMorphy_WordForm($word, $form_no, $pos_id, array_merge($this->common_ancode_grammems, $all_grammems));
1029 }
1030
1031 protected function readAllForms() {
1032 if(!isset($this->all_forms)) {
1033 $result = array();
1034
1035 $form_no = 0;
1036
1037 $found_form_no = array();
1038 foreach($this->helper->getAllFormsWithAncodes($this->word, $this->annot, $found_form_no) as $form) {
1039 $word = $form[0];
1040
1041 $result[] = $this->createWordForm($word, $form_no, $form[1]);
1042
1043 $form_no++;
1044 }
1045
1046 $this->found_form_no = $found_form_no[0];
1047 $this->all_forms = $result;
1048 }
1049
1050 return $this->all_forms;
1051 }
1052
1053 protected function getFoundFormNoLow() {
1054 $this->readAllForms();
1055
1056 return $this->found_form_no['low'];
1057 }
1058
1059 protected function getFoundFormNoHigh() {
1060 $this->readAllForms();
1061
1062 return $this->found_form_no['high'];
1063 }
1064
1065 function getFoundWordForm() {
1066 $result = array();
1067 for($i = $this->getFoundFormNoLow(), $c = $this->getFoundFormNoHigh() + 1; $i < $c; $i++) {
1068 $result[] = $this->getWordForm($i);
1069 }
1070
1071 return $result;
1072 }
1073
1074 function hasGrammems($grammems) {
1075 settype($grammems, 'array');
1076
1077 foreach($this as $wf) {
1078 if($wf->hasGrammems($grammems)) {
1079 return true;
1080 }
1081 }
1082
1083 return false;
1084 }
1085
1086 function getWordFormsByGrammems($grammems) {
1087 settype($grammems, 'array');
1088 $result = array();
1089
1090 foreach($this as $wf) {
1091 if($wf->hasGrammems($grammems)) {
1092 $result[] = $wf;
1093 }
1094 }
1095
1096 return $result;
1097
1098 }
1099
1100 function hasPartOfSpeech($poses) {
1101 settype($poses, 'array');
1102
1103 foreach($this as $wf) {
1104 if(in_array($wf->getPartOfSpeech(), $poses, true)) {
1105 return true;
1106 }
1107 }
1108
1109 return false;
1110 }
1111
1112 function getWordFormsByPartOfSpeech($poses) {
1113 settype($poses, 'array');
1114 $result = array();
1115
1116 foreach($this as $wf) {
1117 if(in_array($wf->getPartOfSpeech(), $poses, true)) {
1118 $result[] = $wf;
1119 }
1120 }
1121
1122 return $result;
1123
1124 }
1125
1126 function count() {
1127 return count($this->readAllForms());
1128 }
1129
1130 function offsetExists($off) {
1131 $this->readAllForms();
1132
1133 return isset($this->all_forms[$off]);
1134 }
1135
1136 function offsetSet($off, $value) {
1137 throw new phpMorphy_Exception(__CLASS__ . " is not mutable");
1138 }
1139
1140 function offsetUnset($off) {
1141 throw new phpMorphy_Exception(__CLASS__ . " is not mutable");
1142 }
1143
1144 function offsetGet($off) {
1145 return $this->getWordForm($off);
1146 }
1147
1148 function getIterator() {
1149 $this->readAllForms();
1150
1151 return new ArrayIterator($this->all_forms);
1152 }
1153 }
1154
1155
1156
1157
1158 interface phpMorphy_Morphier_Finder_Interface {
1159 function findWord($word);
1160 function decodeAnnot($raw, $withBase);
1161 function getAnnotDecoder();
1162 }
1163
1164 abstract class phpMorphy_Morphier_Finder_Base implements phpMorphy_Morphier_Finder_Interface {
1165 protected
1166 $annot_decoder,
1167 $prev_word,
1168 $prev_result = false;
1169
1170 function __construct(phpMorphy_AnnotDecoder_Interface $annotDecoder) {
1171 $this->annot_decoder = $annotDecoder;
1172 }
1173
1174 function findWord($word) {
1175 if($this->prev_word === $word) {
1176 return $this->prev_result;
1177 }
1178
1179 $result = $this->doFindWord($word);
1180
1181 $this->prev_word = $word;
1182 $this->prev_result = $result;
1183
1184 return $result;
1185 }
1186
1187 function getAnnotDecoder() {
1188 return $this->annot_decoder;
1189 }
1190
1191 function decodeAnnot($raw, $withBase) {
1192 return $this->annot_decoder->decode($raw, $withBase);
1193 }
1194
1195 abstract protected function doFindWord($word);
1196 }
1197
1198 class phpMorphy_Morphier_Finder_Common extends phpMorphy_Morphier_Finder_Base {
1199 protected
1200 $fsa,
1201 $root;
1202
1203 function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_AnnotDecoder_Interface $annotDecoder) {
1204 parent::__construct($annotDecoder);
1205
1206 $this->fsa = $fsa;
1207 $this->root = $this->fsa->getRootTrans();
1208 }
1209
1210 function getFsa() {
1211 return $this->fsa;
1212 }
1213
1214 protected function doFindWord($word) {
1215 $result = $this->fsa->walk($this->root, $word);
1216
1217 if(!$result['result'] || null === $result['annot']) {
1218 return false;
1219 }
1220
1221 return $result['annot'];
1222 }
1223 }
1224
1225 class phpMorphy_Morphier_Finder_Predict_Suffix extends phpMorphy_Morphier_Finder_Common {
1226 protected
1227 $min_suf_len,
1228 $unicode;
1229
1230 function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_AnnotDecoder_Interface $annotDecoder, $encoding, $minimalSuffixLength = 4) {
1231 parent::__construct($fsa, $annotDecoder);
1232
1233 $this->min_suf_len = (int)$minimalSuffixLength;
1234 $this->unicode = phpMorphy_UnicodeHelper::create($encoding);
1235 }
1236
1237 protected function doFindWord($word) {
1238 $word_len = $this->unicode->strlen($word);
1239
1240 if(!$word_len) {
1241 return false;
1242 }
1243
1244 for($i = 1, $c = $word_len - $this->min_suf_len; $i < $c; $i++) {
1245 $word = $GLOBALS['__phpmorphy_substr']($word, $this->unicode->firstCharSize($word));
1246
1247 if(false !== ($result = parent::doFindWord($word))) {
1248 break;
1249 }
1250 }
1251
1252 if($i < $c) {
1253
1254 $unknown_len = $i;
1255
1256 return $result;
1257 1258 1259 1260 1261 1262
1263 } else {
1264 return false;
1265 }
1266 }
1267
1268 protected function fixAnnots($annots, $len) {
1269 for($i = 0, $c = count($annots); $i < $c; $i++) {
1270 $annots[$i]['cplen'] = $len;
1271 }
1272
1273 return $annots;
1274 }
1275 }
1276
1277 class phpMorphy_Morphier_PredictCollector extends phpMorphy_Fsa_WordsCollector {
1278 protected
1279 $used_poses = array(),
1280 $annot_decoder,
1281 $collected = 0;
1282
1283 function __construct($limit, phpMorphy_AnnotDecoder_Interface $annotDecoder) {
1284 parent::__construct($limit);
1285
1286 $this->annot_decoder = $annotDecoder;
1287 }
1288
1289 function collect($path, $annotRaw) {
1290 if($this->collected > $this->limit) {
1291 return false;
1292 }
1293
1294 $used_poses =& $this->used_poses;
1295 $annots = $this->decodeAnnot($annotRaw);
1296
1297 for($i = 0, $c = count($annots); $i < $c; $i++) {
1298 $annot = $annots[$i];
1299 $annot['cplen'] = $annot['plen'] = 0;
1300
1301 $pos_id = $annot['pos_id'];
1302
1303 if(isset($used_poses[$pos_id])) {
1304 $result_idx = $used_poses[$pos_id];
1305
1306 if($annot['freq'] > $this->items[$result_idx]['freq']) {
1307 $this->items[$result_idx] = $annot;
1308 }
1309 } else {
1310 $used_poses[$pos_id] = count($this->items);
1311 $this->items[] = $annot;
1312 }
1313 }
1314
1315 $this->collected++;
1316 return true;
1317 }
1318
1319 function clear() {
1320 parent::clear();
1321 $this->collected = 0;
1322 $this->used_poses = array();
1323 }
1324
1325 function decodeAnnot($annotRaw) {
1326 return $this->annot_decoder->decode($annotRaw, true);
1327 }
1328 }
1329
1330 class phpMorphy_Morphier_Finder_Predict_Databse extends phpMorphy_Morphier_Finder_Common {
1331 protected
1332 $collector,
1333 $unicode,
1334 $graminfo,
1335 $min_postfix_match;
1336
1337 function __construct(
1338 phpMorphy_Fsa_Interface $fsa,
1339 phpMorphy_AnnotDecoder_Interface $annotDecoder,
1340 $encoding,
1341 phpMorphy_GramInfo_Interace $graminfo,
1342 $minPostfixMatch = 2,
1343 $collectLimit = 32
1344 ) {
1345 parent::__construct($fsa, $annotDecoder);
1346
1347 $this->graminfo = $graminfo;
1348 $this->min_postfix_match = $minPostfixMatch;
1349 $this->collector = $this->createCollector($collectLimit, $this->getAnnotDecoder());
1350
1351 $this->unicode = phpMorphy_UnicodeHelper::create($encoding);
1352 }
1353
1354 protected function createAnnotDecoder() {
1355 return phpmorphy_annot_decoder_new('predict');
1356 }
1357
1358 protected function doFindWord($word) {
1359 $rev_word = $this->unicode->strrev($word);
1360 $result = $this->fsa->walk($this->root, $rev_word);
1361
1362 if($result['result'] && null !== $result['annot']) {
1363 $annots = $result['annot'];
1364 } else {
1365 $match_len = $this->unicode->strlen($this->unicode->fixTrailing($GLOBALS['__phpmorphy_substr']($rev_word, 0, $result['walked'])));
1366
1367 if(null === ($annots = $this->determineAnnots($result['last_trans'], $match_len))) {
1368 return false;
1369 }
1370 }
1371
1372 if(!is_array($annots)) {
1373 $annots = $this->collector->decodeAnnot($annots);
1374 }
1375
1376 return $this->fixAnnots($word, $annots);
1377 }
1378
1379 protected function determineAnnots($trans, $matchLen) {
1380 $annots = $this->fsa->getAnnot($trans);
1381
1382 if(null == $annots && $matchLen >= $this->min_postfix_match) {
1383 $this->collector->clear();
1384
1385 $this->fsa->collect(
1386 $trans,
1387 $this->collector->getCallback()
1388 );
1389
1390 $annots = $this->collector->getItems();
1391 }
1392
1393 return $annots;
1394 }
1395
1396 protected function fixAnnots($word, $annots) {
1397 $result = array();
1398
1399
1400 for($i = 0, $c = count($annots); $i < $c; $i++) {
1401 $annot = $annots[$i];
1402
1403 $annot['cplen'] = $annot['plen'] = 0;
1404
1405 $flexias = $this->graminfo->readFlexiaData($annot, false);
1406
1407 $prefix = $flexias[$annot['form_no'] * 2];
1408 $suffix = $flexias[$annot['form_no'] * 2 + 1];
1409
1410 $plen = $GLOBALS['__phpmorphy_strlen']($prefix);
1411 $slen = $GLOBALS['__phpmorphy_strlen']($suffix);
1412 if(
1413 (!$plen || $GLOBALS['__phpmorphy_substr']($word, 0, $GLOBALS['__phpmorphy_strlen']($prefix)) === $prefix) &&
1414 (!$slen || $GLOBALS['__phpmorphy_substr']($word, -$GLOBALS['__phpmorphy_strlen']($suffix)) === $suffix)
1415 ) {
1416 $result[] = $annot;
1417 }
1418 }
1419
1420 return count($result) ? $result : false;
1421 }
1422
1423 protected function createCollector($limit) {
1424 return new phpMorphy_Morphier_PredictCollector($limit, $this->getAnnotDecoder());
1425 }
1426 }
1427
1428
1429
1430
1431 abstract class phpMorphy_Morphier_Base implements phpMorphy_Morphier_Interface {
1432 protected
1433 1434 1435
1436 $finder,
1437 1438 1439
1440 $helper;
1441
1442 function __construct(phpMorphy_Morphier_Finder_Interface $finder, phpMorphy_Morphier_Helper $helper) {
1443 $this->finder = $finder;
1444
1445 $this->helper = clone $helper;
1446 $this->helper->setAnnotDecoder($finder->getAnnotDecoder());
1447 }
1448
1449 1450 1451
1452 function getFinder() {
1453 return $this->finder;
1454 }
1455
1456 1457 1458
1459 function getHelper() {
1460 return $this->helper;
1461 }
1462
1463 function getAnnot($word) {
1464 if(false === ($annots = $this->finder->findWord($word))) {
1465 return false;
1466 }
1467
1468 return $this->helper->decodeAnnot($annots, true);
1469 }
1470
1471 function getWordDescriptor($word) {
1472 if(false === ($annots = $this->finder->findWord($word))) {
1473 return false;
1474 }
1475
1476 return $this->helper->getWordDescriptor($word, $annots);
1477 }
1478
1479 function getAllFormsWithAncodes($word) {
1480 if(false === ($annots = $this->finder->findWord($word))) {
1481 return false;
1482 }
1483
1484 return $this->helper->getAllFormsWithResolvedAncodes($word, $annots);
1485 }
1486
1487 function getPartOfSpeech($word) {
1488 if(false === ($annots = $this->finder->findWord($word))) {
1489 return false;
1490 }
1491
1492 return $this->helper->getPartOfSpeech($word, $annots);
1493 }
1494
1495 function getBaseForm($word) {
1496 if(false === ($annots = $this->finder->findWord($word))) {
1497 return false;
1498 }
1499
1500 return $this->helper->getBaseForm($word, $annots);
1501 }
1502
1503 function getPseudoRoot($word) {
1504 if(false === ($annots = $this->finder->findWord($word))) {
1505 return false;
1506 }
1507
1508 return $this->helper->getPseudoRoot($word, $annots);
1509 }
1510
1511 function getAllForms($word) {
1512 if(false === ($annots = $this->finder->findWord($word))) {
1513 return false;
1514 }
1515
1516 return $this->helper->getAllForms($word, $annots);
1517 }
1518
1519 function getAncode($word) {
1520 if(false === ($annots = $this->finder->findWord($word))) {
1521 return false;
1522 }
1523
1524 return $this->helper->getAncode($annots);
1525 }
1526
1527 function getGrammarInfo($word) {
1528 if(false === ($annots = $this->finder->findWord($word))) {
1529 return false;
1530 }
1531
1532 return $this->helper->getGrammarInfo($annots);
1533 }
1534
1535 function getGrammarInfoMergeForms($word) {
1536 if(false === ($annots = $this->finder->findWord($word))) {
1537 return false;
1538 }
1539
1540 return $this->helper->getGrammarInfoMergeForms($annots);
1541 }
1542
1543 function castFormByGramInfo($word, $partOfSpeech, $grammems, $returnOnlyWord = false, $callback = null) {
1544 if(false === ($annots = $this->finder->findWord($word))) {
1545 return false;
1546 }
1547
1548 return $this->helper->castFormByGramInfo($word, $annots);
1549 }
1550
1551 function castFormByPattern($word, $patternWord, $returnOnlyWord = false, $callback = null) {
1552 if(false === ($orig_annots = $this->finder->findWord($word))) {
1553 return false;
1554 }
1555
1556 if(false === ($pattern_annots = $this->finder->findWord($patternWord))) {
1557 return false;
1558 }
1559
1560 return $this->helper->castFormByPattern(
1561 $word, $orig_annots,
1562 $patternWord, $pattern_annots,
1563 $returnOnlyWord,
1564 $callback
1565 );
1566 }
1567 };
1568
1569 class phpMorphy_Morphier_Common extends phpMorphy_Morphier_Base {
1570 function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) {
1571 parent::__construct(
1572 new phpMorphy_Morphier_Finder_Common(
1573 $fsa,
1574 $this->createAnnotDecoder($helper)
1575 ),
1576 $helper
1577 );
1578 }
1579
1580 protected function createAnnotDecoder(phpMorphy_Morphier_Helper $helper) {
1581 return phpMorphy_AnnotDecoder_Factory::create($helper->getGramInfo()->getEnds())->getCommonDecoder();
1582 }
1583 };
1584
1585 class phpMorphy_Morphier_Predict_Suffix extends phpMorphy_Morphier_Base {
1586 function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) {
1587 parent::__construct(
1588 new phpMorphy_Morphier_Finder_Predict_Suffix(
1589 $fsa,
1590 $this->createAnnotDecoder($helper),
1591 $helper->getGramInfo()->getEncoding(),
1592 4
1593 ),
1594 $helper
1595 );
1596 }
1597
1598 protected function createAnnotDecoder(phpMorphy_Morphier_Helper $helper) {
1599 return phpMorphy_AnnotDecoder_Factory::create($helper->getGramInfo()->getEnds())->getCommonDecoder();
1600 }
1601 }
1602
1603 class phpMorphy_Morphier_Predict_Database extends phpMorphy_Morphier_Base {
1604 function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) {
1605 parent::__construct(
1606 new phpMorphy_Morphier_Finder_Predict_Databse(
1607 $fsa,
1608 $this->createAnnotDecoder($helper),
1609 $helper->getGramInfo()->getEncoding(),
1610 $helper->getGramInfo(),
1611 2,
1612 32
1613 ),
1614 $helper
1615 );
1616 }
1617
1618 protected function createAnnotDecoder(phpMorphy_Morphier_Helper $helper) {
1619 return phpMorphy_AnnotDecoder_Factory::create($helper->getGramInfo()->getEnds())->getPredictDecoder();
1620 }
1621 }
1622
1623 class phpMorphy_Morphier_Bulk implements phpMorphy_Morphier_Interface {
1624 protected
1625 $fsa,
1626 $root_trans,
1627 $helper,
1628 $notfound = array(),
1629 $graminfo;
1630
1631 function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) {
1632 $this->fsa = $fsa;
1633 $this->root_trans = $fsa->getRootTrans();
1634
1635 $this->helper = clone $helper;
1636 $this->helper->setAnnotDecoder($this->createAnnotDecoder($helper));
1637
1638 $this->graminfo = $helper->getGramInfo();
1639 }
1640
1641 function getFsa() {
1642 return $this->fsa;
1643 }
1644
1645 function getHelper() {
1646 return $this->helper;
1647 }
1648
1649 function getGraminfo() {
1650 return $this->graminfo;
1651 }
1652
1653 function getNotFoundWords() {
1654 return $this->notfound;
1655 }
1656
1657 protected function createAnnotDecoder(phpMorphy_Morphier_Helper $helper) {
1658 return new phpMorphy_AnnotDecoder_Common($helper->getGramInfo()->getEnds());
1659 }
1660
1661 function getAnnot($word) {
1662 $result = array();
1663
1664 foreach($this->findWord($word) as $annot => $words) {
1665 $annot = $this->helper->decodeAnnot($annot, true);
1666
1667 foreach($words as $word) {
1668 $result[$word][] = $annot;
1669 }
1670 }
1671
1672 return $result;
1673 }
1674
1675 function getBaseForm($words) {
1676 $annots = $this->findWord($words);
1677
1678 return $this->composeForms($annots, true, false, false);
1679 }
1680
1681 function getAllForms($words) {
1682 $annots = $this->findWord($words);
1683
1684 return $this->composeForms($annots, false, false, false);
1685 }
1686
1687 function getPseudoRoot($words) {
1688 $annots = $this->findWord($words);
1689
1690 return $this->composeForms($annots, false, true, false);
1691 }
1692
1693 function getPartOfSpeech($words) {
1694 $annots = $this->findWord($words);
1695
1696 return $this->composeForms($annots, false, false, true);
1697 }
1698
1699 protected function processAnnotsWithHelper($words, $method, $callWithWord = false) {
1700 $result = array();
1701
1702 foreach($this->findWord($words) as $annot_raw => $words) {
1703 if($GLOBALS['__phpmorphy_strlen']($annot_raw) == 0) continue;
1704
1705 if($callWithWord) {
1706 foreach($words as $word) {
1707 $result[$word] = $this->helper->$method($word, $annot_raw);
1708 }
1709 } else {
1710 $result_for_annot = $this->helper->$method($annot_raw);
1711
1712 foreach($words as $word) {
1713 $result[$word] = $result_for_annot;
1714 }
1715 }
1716 }
1717
1718 return $result;
1719 }
1720
1721 function getAncode($words) {
1722 return $this->processAnnotsWithHelper($words, 'getAncode');
1723 }
1724
1725 function getGrammarInfoMergeForms($words) {
1726 return $this->processAnnotsWithHelper($words, 'getGrammarInfoMergeForms');
1727 }
1728
1729 function getGrammarInfo($words) {
1730 return $this->processAnnotsWithHelper($words, 'getGrammarInfo');
1731 }
1732
1733 function getAllFormsWithAncodes($words) {
1734 return $this->processAnnotsWithHelper($words, 'getAllFormsWithResolvedAncodes', true);
1735 }
1736
1737 function getWordDescriptor($word) {
1738 return $this->processAnnotsWithHelper($words, 'getWordDescriptor', true);
1739 }
1740
1741 protected function findWord($words) {
1742 $unknown_words_annot = '';
1743
1744 $this->notfound = array();
1745
1746 list($labels, $finals, $dests) = $this->buildPatriciaTrie($words);
1747
1748 $annots = array();
1749 $unknown_words_annot = '';
1750 $stack = array(0, '', $this->root_trans);
1751 $stack_idx = 0;
1752
1753 $fsa = $this->fsa;
1754
1755
1756 while($stack_idx >= 0) {
1757 $n = $stack[$stack_idx];
1758 $path = $stack[$stack_idx + 1] . $labels[$n];
1759 $trans = $stack[$stack_idx + 2];
1760 $stack_idx -= 3;
1761
1762 $is_final = $finals[$n] > 0;
1763
1764 $result = false;
1765 if(false !== $trans && $n > 0) {
1766 $label = $labels[$n];
1767
1768 $result = $fsa->walk($trans, $label, $is_final);
1769
1770 if($GLOBALS['__phpmorphy_strlen']($label) == $result['walked']) {
1771 $trans = $result['word_trans'];
1772 } else {
1773 $trans = false;
1774 }
1775 }
1776
1777 if($is_final) {
1778 if(false !== $trans && isset($result['annot'])) {
1779 $annots[$result['annot']][] = $path;
1780 } else {
1781
1782 $this->notfound[] = $path;
1783 }
1784 }
1785
1786 if(false !== $dests[$n]) {
1787 foreach($dests[$n] as $dest) {
1788 $stack_idx += 3;
1789 $stack[$stack_idx] = $dest;
1790 $stack[$stack_idx + 1] = $path;
1791 $stack[$stack_idx + 2] = $trans;
1792 }
1793 }
1794 }
1795
1796 return $annots;
1797 }
1798
1799 protected function composeForms($annotsRaw, $onlyBase, $pseudoRoot, $partOfSpeech) {
1800 $result = array();
1801
1802
1803 foreach($annotsRaw as $annot_raw => $words) {
1804 if($GLOBALS['__phpmorphy_strlen']($annot_raw) == 0) continue;
1805
1806 foreach($this->helper->decodeAnnot($annot_raw, $onlyBase) as $annot) {
1807 if(!($onlyBase || $pseudoRoot)) {
1808 $flexias = $this->graminfo->readFlexiaData($annot);
1809 }
1810
1811 $cplen = $annot['cplen'];
1812 $plen = $annot['plen'];
1813 $flen = $annot['flen'];
1814
1815 if($partOfSpeech) {
1816 $pos_id = $this->helper->extractPartOfSpeech($annot);
1817 }
1818
1819 foreach($words as $word) {
1820 if($flen) {
1821 $base = $GLOBALS['__phpmorphy_substr']($word, $cplen + $plen, -$flen);
1822 } else {
1823 if($cplen || $plen) {
1824 $base = $GLOBALS['__phpmorphy_substr']($word, $cplen + $plen);
1825 } else {
1826 $base = $word;
1827 }
1828 }
1829
1830 $prefix = $cplen ? $GLOBALS['__phpmorphy_substr']($word, 0, $cplen) : '';
1831
1832 if($pseudoRoot) {
1833 $result[$word][$base] = 1;
1834 } else if($onlyBase) {
1835 $form = $prefix . $annot['base_prefix'] . $base . $annot['base_suffix'];
1836
1837 $result[$word][$form] = 1;
1838 } else if($partOfSpeech) {
1839 $result[$word][$pos_id] = 1;
1840 } else {
1841 for($i = 0, $c = count($flexias); $i < $c; $i += 2) {
1842 $form = $prefix . $flexias[$i] . $base . $flexias[$i + 1];
1843 $result[$word][$form] = 1;
1844 }
1845 }
1846 }
1847 }
1848 }
1849
1850 for($keys = array_keys($result), $i = 0, $c = count($result); $i < $c; $i++) {
1851 $key = $keys[$i];
1852
1853 $result[$key] = array_keys($result[$key]);
1854 }
1855
1856 return $result;
1857 }
1858
1859 protected function buildPatriciaTrie($words) {
1860 if(!is_array($words)) {
1861 throw new phpMorphy_Exception("Words must be array");
1862 }
1863
1864 sort($words);
1865
1866 $stack = array();
1867 $prev_word = '';
1868 $prev_word_len = 0;
1869 $prev_lcp = 0;
1870
1871 $state_labels = array();
1872 $state_finals = array();
1873 $state_dests = array();
1874
1875 $state_labels[] = '';
1876 $state_finals = '0';
1877 $state_dests[] = array();
1878
1879 $node = 0;
1880
1881 foreach($words as $word) {
1882 if($word == $prev_word) {
1883 continue;
1884 }
1885
1886 $word_len = $GLOBALS['__phpmorphy_strlen']($word);
1887
1888 for($lcp = 0, $c = min($prev_word_len, $word_len); $lcp < $c && $word[$lcp] == $prev_word[$lcp]; $lcp++);
1889
1890 if($lcp == 0) {
1891 $stack = array();
1892
1893 $new_state_id = count($state_labels);
1894
1895 $state_labels[] = $word;
1896 $state_finals .= '1';
1897 $state_dests[] = false;
1898
1899 $state_dests[0][] = $new_state_id;
1900
1901 $node = $new_state_id;
1902 } else {
1903 $need_split = true;
1904 $trim_size = 0;
1905
1906 if($lcp == $prev_lcp) {
1907 $need_split = false;
1908 $node = $stack[count($stack) - 1];
1909 } elseif($lcp > $prev_lcp) {
1910 if($lcp == $prev_word_len) {
1911 $need_split = false;
1912 } else {
1913 $need_split = true;
1914 $trim_size = $lcp - $prev_lcp;
1915 }
1916
1917 $stack[] = $node;
1918 } else {
1919 $trim_size = $GLOBALS['__phpmorphy_strlen']($prev_word) - $lcp;
1920
1921 for($stack_size = count($stack) - 1; ;--$stack_size) {
1922 $trim_size -= $GLOBALS['__phpmorphy_strlen']($state_labels[$node]);
1923
1924 if($trim_size <= 0) {
1925 break;
1926 }
1927
1928 if(count($stack) < 1) {
1929 throw new phpMorphy_Exception('Infinite loop posible');
1930 }
1931
1932 $node = array_pop($stack);
1933 }
1934
1935 $need_split = $trim_size < 0;
1936 $trim_size = abs($trim_size);
1937
1938 if($need_split) {
1939 $stack[] = $node;
1940 } else {
1941 $node = $stack[$stack_size];
1942 }
1943 }
1944
1945 if($need_split) {
1946 $node_key = $state_labels[$node];
1947
1948
1949 $new_node_id_1 = count($state_labels);
1950 $new_node_id_2 = $new_node_id_1 + 1;
1951
1952
1953 $state_labels[] = $GLOBALS['__phpmorphy_substr']($node_key, $trim_size);
1954 $state_finals .= $state_finals[$node];
1955 $state_dests[] = $state_dests[$node];
1956
1957
1958 $state_labels[$node] = $GLOBALS['__phpmorphy_substr']($node_key, 0, $trim_size);
1959 $state_finals[$node] = '0';
1960 $state_dests[$node] = array($new_node_id_1);
1961
1962
1963 $state_labels[] = $GLOBALS['__phpmorphy_substr']($word, $lcp);
1964 $state_finals .= '1';
1965 $state_dests[] = false;
1966
1967 $state_dests[$node][] = $new_node_id_2;
1968
1969 $node = $new_node_id_2;
1970 } else {
1971 $new_node_id = count($state_labels);
1972
1973 $state_labels[] = $GLOBALS['__phpmorphy_substr']($word, $lcp);
1974 $state_finals .= '1';
1975 $state_dests[] = false;
1976
1977 if(false !== $state_dests[$node]) {
1978 $state_dests[$node][] = $new_node_id;
1979 } else {
1980 $state_dests[$node] = array($new_node_id);
1981 }
1982
1983 $node = $new_node_id;
1984 }
1985 }
1986
1987 $prev_word = $word;
1988 $prev_word_len = $word_len;
1989 $prev_lcp = $lcp;
1990 }
1991
1992 return array($state_labels, $state_finals, $state_dests);
1993 }
1994 }
1995
[Raise a SilverStripe Framework issue/bug](https://github.com/silverstripe/silverstripe-framework/issues/new)
- [Raise a SilverStripe CMS issue/bug](https://github.com/silverstripe/silverstripe-cms/issues/new)
- Please use the
Silverstripe Forums to ask development related questions.
-