1 <?php
2
3 4 5 6 7
8
9
10
11
12
13
14
15
16
17
18
19
20 21
22 define('USE_ASSERTS', function_exists('assert'));
23
24 25 26 27 28
29 class _DiffOp {
30 var $type;
31 var $orig;
32 var $final;
33
34 function reverse() {
35 trigger_error("pure virtual", E_USER_ERROR);
36 }
37
38 function norig() {
39 return $this->orig ? sizeof($this->orig) : 0;
40 }
41
42 function nfinal() {
43 return $this->final ? sizeof($this->final) : 0;
44 }
45 }
46
47 48 49 50 51
52 class _DiffOp_Copy extends _DiffOp {
53 var $type = 'copy';
54
55 function _DiffOp_Copy ($orig, $final = false) {
56 if (!is_array($final))
57 $final = $orig;
58 $this->orig = $orig;
59 $this->final = $final;
60 }
61
62 function reverse() {
63 return new _DiffOp_Copy($this->final, $this->orig);
64 }
65 }
66
67 68 69 70 71
72 class _DiffOp_Delete extends _DiffOp {
73 var $type = 'delete';
74
75 function _DiffOp_Delete ($lines) {
76 $this->orig = $lines;
77 $this->final = false;
78 }
79
80 function reverse() {
81 return new _DiffOp_Add($this->orig);
82 }
83 }
84
85 86 87 88 89
90 class _DiffOp_Add extends _DiffOp {
91 var $type = 'add';
92
93 function _DiffOp_Add ($lines) {
94 $this->final = $lines;
95 $this->orig = false;
96 }
97
98 function reverse() {
99 return new _DiffOp_Delete($this->final);
100 }
101 }
102
103 104 105 106 107
108 class _DiffOp_Change extends _DiffOp {
109 var $type = 'change';
110
111 function _DiffOp_Change ($orig, $final) {
112 $this->orig = $orig;
113 $this->final = $final;
114 }
115
116 function reverse() {
117 return new _DiffOp_Change($this->final, $this->orig);
118 }
119 }
120
121
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
144 class _DiffEngine
145 {
146 function diff ($from_lines, $to_lines) {
147 $n_from = sizeof($from_lines);
148 $n_to = sizeof($to_lines);
149
150 $this->xchanged = $this->ychanged = array();
151 $this->xv = $this->yv = array();
152 $this->xind = $this->yind = array();
153 unset($this->seq);
154 unset($this->in_seq);
155 unset($this->lcs);
156
157
158 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
159 if ($from_lines[$skip] != $to_lines[$skip])
160 break;
161 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
162 }
163
164 $xi = $n_from; $yi = $n_to;
165 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
166 if ($from_lines[$xi] != $to_lines[$yi])
167 break;
168 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
169 }
170
171
172 for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
173 $xhash[$from_lines[$xi]] = 1;
174 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
175 $line = $to_lines[$yi];
176 if ( ($this->ychanged[$yi] = empty($xhash[$line])) )
177 continue;
178 $yhash[$line] = 1;
179 $this->yv[] = $line;
180 $this->yind[] = $yi;
181 }
182 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
183 $line = $from_lines[$xi];
184 if ( ($this->xchanged[$xi] = empty($yhash[$line])) )
185 continue;
186 $this->xv[] = $line;
187 $this->xind[] = $xi;
188 }
189
190
191 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
192
193
194 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
195 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
196
197
198 $edits = array();
199 $xi = $yi = 0;
200 while ($xi < $n_from || $yi < $n_to) {
201 USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
202 USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
203
204
205 $copy = array();
206 while ( $xi < $n_from && $yi < $n_to
207 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
208 $copy[] = $from_lines[$xi++];
209 ++$yi;
210 }
211 if ($copy)
212 $edits[] = new _DiffOp_Copy($copy);
213
214
215 $delete = array();
216 while ($xi < $n_from && $this->xchanged[$xi])
217 $delete[] = $from_lines[$xi++];
218
219 $add = array();
220 while ($yi < $n_to && $this->ychanged[$yi])
221 $add[] = $to_lines[$yi++];
222
223 if ($delete && $add)
224 $edits[] = new _DiffOp_Change($delete, $add);
225 elseif ($delete)
226 $edits[] = new _DiffOp_Delete($delete);
227 elseif ($add)
228 $edits[] = new _DiffOp_Add($add);
229 }
230 return $edits;
231 }
232
233
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
250 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
251 $flip = false;
252
253 if ($xlim - $xoff > $ylim - $yoff) {
254
255
256 $flip = true;
257 list ($xoff, $xlim, $yoff, $ylim)
258 = array( $yoff, $ylim, $xoff, $xlim);
259 }
260
261 if ($flip)
262 for ($i = $ylim - 1; $i >= $yoff; $i--)
263 $ymatches[$this->xv[$i]][] = $i;
264 else
265 for ($i = $ylim - 1; $i >= $yoff; $i--)
266 $ymatches[$this->yv[$i]][] = $i;
267
268 $this->lcs = 0;
269 $this->seq[0]= $yoff - 1;
270 $this->in_seq = array();
271 $ymids[0] = array();
272
273 $numer = $xlim - $xoff + $nchunks - 1;
274 $x = $xoff;
275 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
276 if ($chunk > 0)
277 for ($i = 0; $i <= $this->lcs; $i++)
278 $ymids[$i][$chunk-1] = $this->seq[$i];
279
280 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
281 for ( ; $x < $x1; $x++) {
282 $line = $flip ? $this->yv[$x] : $this->xv[$x];
283 if (empty($ymatches[$line]))
284 continue;
285 $matches = $ymatches[$line];
286 reset($matches);
287 while (list ($junk, $y) = each($matches))
288 if (empty($this->in_seq[$y])) {
289 $k = $this->_lcs_pos($y);
290 USE_ASSERTS && assert($k > 0);
291 $ymids[$k] = $ymids[$k-1];
292 break;
293 }
294 while (list ($junk, $y) = each($matches)) {
295 if ($y > $this->seq[$k-1]) {
296 USE_ASSERTS && assert($y < $this->seq[$k]);
297
298
299 $this->in_seq[$this->seq[$k]] = false;
300 $this->seq[$k] = $y;
301 $this->in_seq[$y] = 1;
302 }
303 else if (empty($this->in_seq[$y])) {
304 $k = $this->_lcs_pos($y);
305 USE_ASSERTS && assert($k > 0);
306 $ymids[$k] = $ymids[$k-1];
307 }
308 }
309 }
310 }
311
312 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
313 $ymid = $ymids[$this->lcs];
314 for ($n = 0; $n < $nchunks - 1; $n++) {
315 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
316 $y1 = $ymid[$n] + 1;
317 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
318 }
319 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
320
321 return array($this->lcs, $seps);
322 }
323
324 function _lcs_pos ($ypos) {
325 $end = $this->lcs;
326 if ($end == 0 || $ypos > $this->seq[$end]) {
327 $this->seq[++$this->lcs] = $ypos;
328 $this->in_seq[$ypos] = 1;
329 return $this->lcs;
330 }
331
332 $beg = 1;
333 while ($beg < $end) {
334 $mid = (int)(($beg + $end) / 2);
335 if ( $ypos > $this->seq[$mid] )
336 $beg = $mid + 1;
337 else
338 $end = $mid;
339 }
340
341 USE_ASSERTS && assert($ypos != $this->seq[$end]);
342
343 $this->in_seq[$this->seq[$end]] = false;
344 $this->seq[$end] = $ypos;
345 $this->in_seq[$ypos] = 1;
346 return $end;
347 }
348
349 350 351 352 353 354 355 356 357 358 359
360 function _compareseq ($xoff, $xlim, $yoff, $ylim) {
361
362 while ($xoff < $xlim && $yoff < $ylim
363 && $this->xv[$xoff] == $this->yv[$yoff]) {
364 ++$xoff;
365 ++$yoff;
366 }
367
368
369 while ($xlim > $xoff && $ylim > $yoff
370 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
371 --$xlim;
372 --$ylim;
373 }
374
375 if ($xoff == $xlim || $yoff == $ylim)
376 $lcs = 0;
377 else {
378
379
380
381 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
382 list ($lcs, $seps)
383 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
384 }
385
386 if ($lcs == 0) {
387
388
389 while ($yoff < $ylim)
390 $this->ychanged[$this->yind[$yoff++]] = 1;
391 while ($xoff < $xlim)
392 $this->xchanged[$this->xind[$xoff++]] = 1;
393 }
394 else {
395
396 reset($seps);
397 $pt1 = $seps[0];
398 while ($pt2 = next($seps)) {
399 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
400 $pt1 = $pt2;
401 }
402 }
403 }
404
405 406 407 408 409 410 411 412 413 414 415 416
417 function _shift_boundaries ($lines, &$changed, $other_changed) {
418 $i = 0;
419 $j = 0;
420
421 USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
422 $len = sizeof($lines);
423 $other_len = sizeof($other_changed);
424
425 while (1) {
426 427 428 429 430 431 432 433 434 435 436
437 while ($j < $other_len && $other_changed[$j])
438 $j++;
439
440 while ($i < $len && ! $changed[$i]) {
441 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
442 $i++; $j++;
443 while ($j < $other_len && $other_changed[$j])
444 $j++;
445 }
446
447 if ($i == $len)
448 break;
449
450 $start = $i;
451
452
453 while (++$i < $len && $changed[$i])
454 continue;
455
456 do {
457 458 459 460
461 $runlength = $i - $start;
462
463 464 465 466 467
468 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
469 $changed[--$start] = 1;
470 $changed[--$i] = false;
471 while ($start > 0 && $changed[$start - 1])
472 $start--;
473 USE_ASSERTS && assert('$j > 0');
474 while ($other_changed[--$j])
475 continue;
476 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
477 }
478
479 480 481 482 483
484 $corresponding = $j < $other_len ? $i : $len;
485
486 487 488 489 490 491 492
493 while ($i < $len && $lines[$start] == $lines[$i]) {
494 $changed[$start++] = false;
495 $changed[$i++] = 1;
496 while ($i < $len && $changed[$i])
497 $i++;
498
499 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
500 $j++;
501 if ($j < $other_len && $other_changed[$j]) {
502 $corresponding = $i;
503 while ($j < $other_len && $other_changed[$j])
504 $j++;
505 }
506 }
507 } while ($runlength != $i - $start);
508
509 510 511 512
513 while ($corresponding < $i) {
514 $changed[--$start] = 1;
515 $changed[--$i] = 0;
516 USE_ASSERTS && assert('$j > 0');
517 while ($other_changed[--$j])
518 continue;
519 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
520 }
521 }
522 }
523 }
524
525 526 527 528 529
530 class Diff
531 {
532 public static $html_cleaner_class = null;
533
534 var $edits;
535
536 537 538 539 540 541 542 543
544 function Diff($from_lines, $to_lines) {
545 $eng = new _DiffEngine;
546 $this->edits = $eng->diff($from_lines, $to_lines);
547
548 }
549
550 551 552 553 554 555 556 557 558 559
560 function reverse () {
561 $rev = $this;
562 $rev->edits = array();
563 foreach ($this->edits as $edit) {
564 $rev->edits[] = $edit->reverse();
565 }
566 return $rev;
567 }
568
569 570 571 572 573
574 function isEmpty () {
575 foreach ($this->edits as $edit) {
576 if ($edit->type != 'copy')
577 return false;
578 }
579 return true;
580 }
581
582 583 584 585 586 587 588
589 function lcs () {
590 $lcs = 0;
591 foreach ($this->edits as $edit) {
592 if ($edit->type == 'copy')
593 $lcs += sizeof($edit->orig);
594 }
595 return $lcs;
596 }
597
598 599 600 601 602 603 604 605
606 function orig() {
607 $lines = array();
608
609 foreach ($this->edits as $edit) {
610 if ($edit->orig)
611 array_splice($lines, sizeof($lines), 0, $edit->orig);
612 }
613 return $lines;
614 }
615
616 617 618 619 620 621 622 623
624 function finaltext() {
625 $lines = array();
626
627 foreach ($this->edits as $edit) {
628 if ($edit->final)
629 array_splice($lines, sizeof($lines), 0, $edit->final);
630 }
631 return $lines;
632 }
633
634 635 636 637 638
639 function _check ($from_lines, $to_lines) {
640 if (serialize($from_lines) != serialize($this->orig()))
641 trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
642 if (serialize($to_lines) != serialize($this->finaltext()))
643 trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
644
645 $rev = $this->reverse();
646 if (serialize($to_lines) != serialize($rev->orig()))
647 trigger_error("Reversed original doesn't match", E_USER_ERROR);
648 if (serialize($from_lines) != serialize($rev->finaltext()))
649 trigger_error("Reversed final doesn't match", E_USER_ERROR);
650
651
652 $prevtype = 'none';
653 foreach ($this->edits as $edit) {
654 if ( $prevtype == $edit->type )
655 trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
656 $prevtype = $edit->type;
657 }
658
659 $lcs = $this->lcs();
660 trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
661 }
662
663
664
665 666 667 668 669 670 671 672 673 674 675
676 static function cleanHTML($content, $cleaner=null) {
677 if (!$cleaner) {
678 if (class_exists(self::$html_cleaner_class)) {
679 $cleaner = new self::$html_cleaner_class;
680 }
681 }
682 if ($cleaner) {
683 $content = $cleaner->cleanHTML($content);
684 } else {
685
686 $doc = new SS_HTMLValue($content);
687 $content = $doc->getContent();
688 }
689 return $content;
690 }
691
692 static function compareHTML($from, $to) {
693
694 $set1 = self::getHTMLChunks($from);
695 $set2 = self::getHTMLChunks($to);
696
697
698 $diff = new Diff($set1, $set2);
699
700 $tagStack[1] = $tagStack[2] = 0;
701 $rechunked[1] = $rechunked[2] = array();
702
703
704
705 foreach($diff->edits as $edit) {
706
707
708
709 switch($edit->type) {
710 case 'copy':
711 $lookForTag = false;
712 $stuffFor[1] = $edit->orig;
713 $stuffFor[2] = $edit->orig;
714 break;
715
716 case 'change':
717 $lookForTag = true;
718 $stuffFor[1] = $edit->orig;
719 $stuffFor[2] = $edit->final;
720 break;
721
722 case 'add':
723 $lookForTag = true;
724 $stuffFor[1] = null;
725 $stuffFor[2] = $edit->final;
726 break;
727
728 case 'delete':
729 $lookForTag = true;
730 $stuffFor[1] = $edit->orig;
731 $stuffFor[2] = null;
732 break;
733 }
734
735 foreach($stuffFor as $listName => $chunks) {
736 if($chunks) {
737 foreach($chunks as $item) {
738
739 if($tagStack[$listName]) $rechunked[$listName][sizeof($rechunked[$listName])-1] .= ' ' . $item;
740 else $rechunked[$listName][] = $item;
741
742 if($lookForTag && isset($item[0]) && $item[0] == "<" && substr($item,0,2) != "</") {
743 $tagStack[$listName] = 1;
744 } else if($tagStack[$listName]) {
745 if(substr($item,0,2) == "</") $tagStack[$listName]--;
746 else if(isset($item[0]) && $item[0] == "<") $tagStack[$listName]++;
747 }
748
749
750 }
751 }
752 }
753 }
754
755
756 $diff = new Diff($rechunked[1], $rechunked[2]);
757 $content = '';
758 foreach($diff->edits as $edit) {
759
760
761
762 switch($edit->type) {
763 case 'copy':
764 $content .= " " . implode(" ", $edit->orig) . " ";
765 break;
766
767 case 'change':
768 $content .= " <ins>" . implode(" ", $edit->final) . "</ins> ";
769 $content .= " <del>" . implode(" ", $edit->orig) . "</del> ";
770 break;
771
772 case 'add':
773 $content .= " <ins>" . implode(" ", $edit->final) . "</ins> ";
774 break;
775
776 case 'delete':
777 $content .= " <del>" . implode(" ", $edit->orig) . "</del> ";
778 break;
779 }
780 }
781
782 return self::cleanHTML($content);
783 }
784 static function getHTMLChunks($content) {
785 $content = str_replace(array(" ","<", ">"),array(" "," <", "> "),$content);
786 $candidateChunks = preg_split("![\t\r\n ]+!", $content);
787 while(list($i,$item) = each($candidateChunks)) {
788 if(isset($item[0]) && $item[0] == "<") {
789 $newChunk = $item;
790 while($item[strlen($item)-1] != ">") {
791 list($i,$item) = each($candidateChunks);
792 $newChunk .= ' ' . $item;
793 }
794 $chunks[] = $newChunk;
795 } else {
796 $chunks[] = $item;
797 }
798 }
799 return $chunks;
800 }
801
802 }
803
804
805
806
807 808 809 810 811
812 class MappedDiff
813 extends Diff
814 {
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837
838 function MappedDiff($from_lines, $to_lines,
839 $mapped_from_lines, $mapped_to_lines) {
840
841 assert(sizeof($from_lines) == sizeof($mapped_from_lines));
842 assert(sizeof($to_lines) == sizeof($mapped_to_lines));
843
844 $this->Diff($mapped_from_lines, $mapped_to_lines);
845
846 $xi = $yi = 0;
847
848
849 for ($i = 0, $max = sizeof($this->edits); $i < $max; $i++) {
850 $orig = &$this->edits[$i]->orig;
851 if (is_array($orig)) {
852 $orig = array_slice($from_lines, $xi, sizeof($orig));
853 $xi += sizeof($orig);
854 }
855
856 $final = &$this->edits[$i]->final;
857 if (is_array($final)) {
858 $final = array_slice($to_lines, $yi, sizeof($final));
859 $yi += sizeof($final);
860 }
861 }
862 }
863 }
864
865 ?>
866
[Raise a SilverStripe Framework issue/bug](https://github.com/silverstripe/silverstripe-framework/issues/new)
- [Raise a SilverStripe CMS issue/bug](https://github.com/silverstripe/silverstripe-cms/issues/new)
- Please use the
Silverstripe Forums to ask development related questions.
-