1: <?php
  2:   3:   4:   5:   6:   7:   8:   9:  10:  11:  12:  13:  14:  15:  16:  17:  18:  19:  20:  21:  22:  23:  24:  25:  26:  27:  28:  29:  30:  31:  32:  33:  34: 
 35: 
 36:  37:  38:  39:  40:  41:  42:  43:  44:  45:  46:  47:  48:  49:  50:  51:  52:  53:  54:  55:  56:  57: 
 58: 
 59:  60:  61:  62:  63:  64:  65:  66:  67:  68:  69:  70:  71:  72:  73:  74:  75:  76:  77: 
 78: abstract class FineDiffOp {
 79:     abstract public function getFromLen();
 80:     abstract public function getToLen();
 81:     abstract public function getOpcode();
 82:     }
 83: 
 84: class FineDiffDeleteOp extends FineDiffOp {
 85:     public function __construct($len) {
 86:         $this->fromLen = $len;
 87:         }
 88:     public function getFromLen() {
 89:         return $this->fromLen;
 90:         }
 91:     public function getToLen() {
 92:         return 0;
 93:         }
 94:     public function getOpcode() {
 95:         if ( $this->fromLen === 1 ) {
 96:             return 'd';
 97:             }
 98:         return "d{$this->fromLen}";
 99:         }
100:     }
101: 
102: class FineDiffInsertOp extends FineDiffOp {
103:     public function __construct($text) {
104:         $this->text = $text;
105:         }
106:     public function getFromLen() {
107:         return 0;
108:         }
109:     public function getToLen() {
110:         return strlen($this->text);
111:         }
112:     public function getText() {
113:         return $this->text;
114:         }
115:     public function getOpcode() {
116:         $to_len = strlen($this->text);
117:         if ( $to_len === 1 ) {
118:             return "i:{$this->text}";
119:             }
120:         return "i{$to_len}:{$this->text}";
121:         }
122:     }
123: 
124: class FineDiffReplaceOp extends FineDiffOp {
125:     public function __construct($fromLen, $text) {
126:         $this->fromLen = $fromLen;
127:         $this->text = $text;
128:         }
129:     public function getFromLen() {
130:         return $this->fromLen;
131:         }
132:     public function getToLen() {
133:         return strlen($this->text);
134:         }
135:     public function getText() {
136:         return $this->text;
137:         }
138:     public function getOpcode() {
139:         if ( $this->fromLen === 1 ) {
140:             $del_opcode = 'd';
141:             }
142:         else {
143:             $del_opcode = "d{$this->fromLen}";
144:             }
145:         $to_len = strlen($this->text);
146:         if ( $to_len === 1 ) {
147:             return "{$del_opcode}i:{$this->text}";
148:             }
149:         return "{$del_opcode}i{$to_len}:{$this->text}";
150:         }
151:     }
152: 
153: class FineDiffCopyOp extends FineDiffOp {
154:     public function __construct($len) {
155:         $this->len = $len;
156:         }
157:     public function getFromLen() {
158:         return $this->len;
159:         }
160:     public function getToLen() {
161:         return $this->len;
162:         }
163:     public function getOpcode() {
164:         if ( $this->len === 1 ) {
165:             return 'c';
166:             }
167:         return "c{$this->len}";
168:         }
169:     public function increase($size) {
170:         return $this->len += $size;
171:         }
172:     }
173: 
174: 175: 176: 177: 178: 
179: class FineDiffOps {
180:     public function appendOpcode($opcode, $from, $from_offset, $from_len) {
181:         if ( $opcode === 'c' ) {
182:             $edits[] = new FineDiffCopyOp($from_len);
183:             }
184:         else if ( $opcode === 'd' ) {
185:             $edits[] = new FineDiffDeleteOp($from_len);
186:             }
187:         else  {
188:             $edits[] = new FineDiffInsertOp(substr($from, $from_offset, $from_len));
189:             }
190:         }
191:     public $edits = array();
192:     }
193: 
194: 195: 196: 197: 198: 199: 
200: class FineDiff {
201: 
202:     203: 204: 205: 206: 
207: 
208:     209: 210: 211: 212: 213: 214: 
215:     public function __construct($from_text = '', $to_text = '', $granularityStack = null) {
216:         
217:         $this->granularityStack = $granularityStack ? $granularityStack : FineDiff::$characterGranularity;
218:         $this->edits = array();
219:         $this->from_text = $from_text;
220:         $this->doDiff($from_text, $to_text);
221:         }
222: 
223:     public function getOps() {
224:         return $this->edits;
225:         }
226: 
227:     public function getOpcodes() {
228:         $opcodes = array();
229:         foreach ( $this->edits as $edit ) {
230:             $opcodes[] = $edit->getOpcode();
231:             }
232:         return implode('', $opcodes);
233:         }
234: 
235:     public function renderDiffToHTML() {
236:         $in_offset = 0;
237:         ob_start();
238:         foreach ( $this->edits as $edit ) {
239:             $n = $edit->getFromLen();
240:             if ( $edit instanceof FineDiffCopyOp ) {
241:                 FineDiff::renderDiffToHTMLFromOpcode('c', $this->from_text, $in_offset, $n);
242:                 }
243:             else if ( $edit instanceof FineDiffDeleteOp ) {
244:                 FineDiff::renderDiffToHTMLFromOpcode('d', $this->from_text, $in_offset, $n);
245:                 }
246:             else if ( $edit instanceof FineDiffInsertOp ) {
247:                 FineDiff::renderDiffToHTMLFromOpcode('i', $edit->getText(), 0, $edit->getToLen());
248:                 }
249:             else  {
250:                 FineDiff::renderDiffToHTMLFromOpcode('d', $this->from_text, $in_offset, $n);
251:                 FineDiff::renderDiffToHTMLFromOpcode('i', $edit->getText(), 0, $edit->getToLen());
252:                 }
253:             $in_offset += $n;
254:             }
255:         return ob_get_clean();
256:         }
257: 
258:     259: 260: 261: 
262:     public static function getDiffOpcodes($from, $to, $granularities = null) {
263:         $diff = new FineDiff($from, $to, $granularities);
264:         return $diff->getOpcodes();
265:         }
266: 
267:     268: 269: 
270:     public static function getDiffOpsFromOpcodes($opcodes) {
271:         $diffops = new FineDiffOps();
272:         FineDiff::renderFromOpcodes(null, $opcodes, array($diffops,'appendOpcode'));
273:         return $diffops->edits;
274:         }
275: 
276:     277: 278: 
279:     public static function renderToTextFromOpcodes($from, $opcodes) {
280:         ob_start();
281:         FineDiff::renderFromOpcodes($from, $opcodes, array('FineDiff','renderToTextFromOpcode'));
282:         return ob_get_clean();
283:         }
284: 
285:     286: 287: 
288:     public static function renderDiffToHTMLFromOpcodes($from, $opcodes) {
289:         ob_start();
290:         FineDiff::renderFromOpcodes($from, $opcodes, array('FineDiff','renderDiffToHTMLFromOpcode'));
291:         return ob_get_clean();
292:         }
293: 
294:     295: 296: 297: 
298:     public static function renderFromOpcodes($from, $opcodes, $callback) {
299:         if ( !is_callable($callback) ) {
300:             return;
301:             }
302:         $opcodes_len = strlen($opcodes);
303:         $from_offset = $opcodes_offset = 0;
304:         while ( $opcodes_offset <  $opcodes_len ) {
305:             $opcode = substr($opcodes, $opcodes_offset, 1);
306:             $opcodes_offset++;
307:             $n = intval(substr($opcodes, $opcodes_offset));
308:             if ( $n ) {
309:                 $opcodes_offset += strlen(strval($n));
310:                 }
311:             else {
312:                 $n = 1;
313:                 }
314:             if ( $opcode === 'c' ) { 
315:                 call_user_func($callback, 'c', $from, $from_offset, $n, '');
316:                 $from_offset += $n;
317:                 }
318:             else if ( $opcode === 'd' ) { 
319:                 call_user_func($callback, 'd', $from, $from_offset, $n, '');
320:                 $from_offset += $n;
321:                 }
322:             else  { 
323:                 call_user_func($callback, 'i', $opcodes, $opcodes_offset + 1, $n);
324:                 $opcodes_offset += 1 + $n;
325:                 }
326:             }
327:         }
328: 
329:     330: 331: 
332: 
333:     const paragraphDelimiters = "\n\r";
334:     public static $paragraphGranularity = array(
335:         FineDiff::paragraphDelimiters
336:         );
337:     const sentenceDelimiters = ".\n\r";
338:     public static $sentenceGranularity = array(
339:         FineDiff::paragraphDelimiters,
340:         FineDiff::sentenceDelimiters
341:         );
342:     const wordDelimiters = " \t.\n\r";
343:     public static $wordGranularity = array(
344:         FineDiff::paragraphDelimiters,
345:         FineDiff::sentenceDelimiters,
346:         FineDiff::wordDelimiters
347:         );
348:     const characterDelimiters = "";
349:     public static $characterGranularity = array(
350:         FineDiff::paragraphDelimiters,
351:         FineDiff::sentenceDelimiters,
352:         FineDiff::wordDelimiters,
353:         FineDiff::characterDelimiters
354:         );
355: 
356:     public static $textStack = array(
357:         ".",
358:         " \t.\n\r",
359:         ""
360:         );
361: 
362:     363: 364: 365: 366: 
367: 
368:     369: 370: 
371:     private function doDiff($from_text, $to_text) {
372:         $this->last_edit = false;
373:         $this->stackpointer = 0;
374:         $this->from_text = $from_text;
375:         $this->from_offset = 0;
376:         
377:         if ( empty($this->granularityStack) ) {
378:             return;
379:             }
380:         $this->_processGranularity($from_text, $to_text);
381:         }
382: 
383:     384: 385: 386: 387: 388: 389: 
390:     private function _processGranularity($from_segment, $to_segment) {
391:         $delimiters = $this->granularityStack[$this->stackpointer++];
392:         $has_next_stage = $this->stackpointer < count($this->granularityStack);
393:         foreach ( FineDiff::doFragmentDiff($from_segment, $to_segment, $delimiters) as $fragment_edit ) {
394:             
395:             if ( $fragment_edit instanceof fineDiffReplaceOp && $has_next_stage ) {
396:                 $this->_processGranularity(
397:                     substr($this->from_text, $this->from_offset, $fragment_edit->getFromLen()),
398:                     $fragment_edit->getText()
399:                     );
400:                 }
401:             
402:             else if ( $fragment_edit instanceof fineDiffCopyOp && $this->last_edit instanceof fineDiffCopyOp ) {
403:                 $this->edits[count($this->edits)-1]->increase($fragment_edit->getFromLen());
404:                 $this->from_offset += $fragment_edit->getFromLen();
405:                 }
406:             else {
407:                 
408:                 
409:                 
410:                 $this->edits[] = $this->last_edit = $fragment_edit;
411:                 $this->from_offset += $fragment_edit->getFromLen();
412:                 }
413:             }
414:         $this->stackpointer--;
415:         }
416: 
417:     418: 419: 420: 421: 422: 423: 
424:     private static function doFragmentDiff($from_text, $to_text, $delimiters) {
425:         
426:         
427:         
428:         if ( empty($delimiters) ) {
429:             return FineDiff::doCharDiff($from_text, $to_text);
430:             }
431: 
432:         $result = array();
433: 
434:         
435:         $from_text_len = strlen($from_text);
436:         $to_text_len = strlen($to_text);
437:         $from_fragments = FineDiff::extractFragments($from_text, $delimiters);
438:         $to_fragments = FineDiff::extractFragments($to_text, $delimiters);
439: 
440:         $jobs = array(array(0, $from_text_len, 0, $to_text_len));
441: 
442:         $cached_array_keys = array();
443: 
444:         while ( $job = array_pop($jobs) ) {
445: 
446:             
447:             list($from_segment_start, $from_segment_end, $to_segment_start, $to_segment_end) = $job;
448: 
449:             
450:             $from_segment_length = $from_segment_end - $from_segment_start;
451:             $to_segment_length = $to_segment_end - $to_segment_start;
452:             if ( !$from_segment_length || !$to_segment_length ) {
453:                 if ( $from_segment_length ) {
454:                     $result[$from_segment_start * 4] = new fineDiffDeleteOp($from_segment_length);
455:                     }
456:                 else if ( $to_segment_length ) {
457:                     $result[$from_segment_start * 4 + 1] = new fineDiffInsertOp(substr($to_text, $to_segment_start, $to_segment_length));
458:                     }
459:                 continue;
460:                 }
461: 
462:             
463:             $best_copy_length = 0;
464: 
465:             $from_base_fragment_index = $from_segment_start;
466: 
467:             $cached_array_keys_for_current_segment = array();
468: 
469:             while ( $from_base_fragment_index < $from_segment_end ) {
470:                 $from_base_fragment = $from_fragments[$from_base_fragment_index];
471:                 $from_base_fragment_length = strlen($from_base_fragment);
472:                 
473:                 if ( !isset($cached_array_keys_for_current_segment[$from_base_fragment]) ) {
474:                     if ( !isset($cached_array_keys[$from_base_fragment]) ) {
475:                         $to_all_fragment_indices = $cached_array_keys[$from_base_fragment] = array_keys($to_fragments, $from_base_fragment, true);
476:                         }
477:                     else {
478:                         $to_all_fragment_indices = $cached_array_keys[$from_base_fragment];
479:                         }
480:                     
481:                     if ( $to_segment_start > 0 || $to_segment_end < $to_text_len ) {
482:                         $to_fragment_indices = array();
483:                         foreach ( $to_all_fragment_indices as $to_fragment_index ) {
484:                             if ( $to_fragment_index < $to_segment_start ) { continue; }
485:                             if ( $to_fragment_index >= $to_segment_end ) { break; }
486:                             $to_fragment_indices[] = $to_fragment_index;
487:                             }
488:                         $cached_array_keys_for_current_segment[$from_base_fragment] = $to_fragment_indices;
489:                         }
490:                     else {
491:                         $to_fragment_indices = $to_all_fragment_indices;
492:                         }
493:                     }
494:                 else {
495:                     $to_fragment_indices = $cached_array_keys_for_current_segment[$from_base_fragment];
496:                     }
497:                 
498:                 foreach ( $to_fragment_indices as $to_base_fragment_index ) {
499:                     $fragment_index_offset = $from_base_fragment_length;
500:                     
501:                     for (;;) {
502:                         $fragment_from_index = $from_base_fragment_index + $fragment_index_offset;
503:                         if ( $fragment_from_index >= $from_segment_end ) {
504:                             break;
505:                             }
506:                         $fragment_to_index = $to_base_fragment_index + $fragment_index_offset;
507:                         if ( $fragment_to_index >= $to_segment_end ) {
508:                             break;
509:                             }
510:                         if ( $from_fragments[$fragment_from_index] !== $to_fragments[$fragment_to_index] ) {
511:                             break;
512:                             }
513:                         $fragment_length = strlen($from_fragments[$fragment_from_index]);
514:                         $fragment_index_offset += $fragment_length;
515:                         }
516:                     if ( $fragment_index_offset > $best_copy_length ) {
517:                         $best_copy_length = $fragment_index_offset;
518:                         $best_from_start = $from_base_fragment_index;
519:                         $best_to_start = $to_base_fragment_index;
520:                         }
521:                     }
522:                 $from_base_fragment_index += strlen($from_base_fragment);
523:                 
524:                 
525:                 if ( $best_copy_length >= $from_segment_length / 2) {
526:                     break;
527:                     }
528:                 
529:                 
530:                 if ( $from_base_fragment_index + $best_copy_length >= $from_segment_end ) {
531:                     break;
532:                     }
533:                 }
534: 
535:             if ( $best_copy_length ) {
536:                 $jobs[] = array($from_segment_start, $best_from_start, $to_segment_start, $best_to_start);
537:                 $result[$best_from_start * 4 + 2] = new fineDiffCopyOp($best_copy_length);
538:                 $jobs[] = array($best_from_start + $best_copy_length, $from_segment_end, $best_to_start + $best_copy_length, $to_segment_end);
539:                 }
540:             else {
541:                 $result[$from_segment_start * 4 ] = new fineDiffReplaceOp($from_segment_length, substr($to_text, $to_segment_start, $to_segment_length));
542:                 }
543:             }
544: 
545:         ksort($result, SORT_NUMERIC);
546:         return array_values($result);
547:         }
548: 
549:     550: 551: 552: 553: 554: 555: 556: 557: 558: 559: 560: 561: 562: 563: 564: 
565:     private static function doCharDiff($from_text, $to_text) {
566:         $result = array();
567:         $jobs = array(array(0, strlen($from_text), 0, strlen($to_text)));
568:         while ( $job = array_pop($jobs) ) {
569:             
570:             list($from_segment_start, $from_segment_end, $to_segment_start, $to_segment_end) = $job;
571:             $from_segment_len = $from_segment_end - $from_segment_start;
572:             $to_segment_len = $to_segment_end - $to_segment_start;
573: 
574:             
575:             if ( !$from_segment_len || !$to_segment_len ) {
576:                 if ( $from_segment_len ) {
577:                     $result[$from_segment_start * 4 + 0] = new fineDiffDeleteOp($from_segment_len);
578:                     }
579:                 else if ( $to_segment_len ) {
580:                     $result[$from_segment_start * 4 + 1] = new fineDiffInsertOp(substr($to_text, $to_segment_start, $to_segment_len));
581:                     }
582:                 continue;
583:                 }
584:             if ( $from_segment_len >= $to_segment_len ) {
585:                 $copy_len = $to_segment_len;
586:                 while ( $copy_len ) {
587:                     $to_copy_start = $to_segment_start;
588:                     $to_copy_start_max = $to_segment_end - $copy_len;
589:                     while ( $to_copy_start <= $to_copy_start_max ) {
590:                         $from_copy_start = strpos(substr($from_text, $from_segment_start, $from_segment_len), substr($to_text, $to_copy_start, $copy_len));
591:                         if ( $from_copy_start !== false ) {
592:                             $from_copy_start += $from_segment_start;
593:                             break 2;
594:                             }
595:                         $to_copy_start++;
596:                         }
597:                     $copy_len--;
598:                     }
599:                 }
600:             else {
601:                 $copy_len = $from_segment_len;
602:                 while ( $copy_len ) {
603:                     $from_copy_start = $from_segment_start;
604:                     $from_copy_start_max = $from_segment_end - $copy_len;
605:                     while ( $from_copy_start <= $from_copy_start_max ) {
606:                         $to_copy_start = strpos(substr($to_text, $to_segment_start, $to_segment_len), substr($from_text, $from_copy_start, $copy_len));
607:                         if ( $to_copy_start !== false ) {
608:                             $to_copy_start += $to_segment_start;
609:                             break 2;
610:                             }
611:                         $from_copy_start++;
612:                         }
613:                     $copy_len--;
614:                     }
615:                 }
616:             
617:             if ( $copy_len ) {
618:                 $jobs[] = array($from_segment_start, $from_copy_start, $to_segment_start, $to_copy_start);
619:                 $result[$from_copy_start * 4 + 2] = new FineDiffCopyOp($copy_len);
620:                 $jobs[] = array($from_copy_start + $copy_len, $from_segment_end, $to_copy_start + $copy_len, $to_segment_end);
621:                 }
622:             
623:             else {
624:                 $result[$from_segment_start * 4] = new FineDiffReplaceOp($from_segment_len, substr($to_text, $to_segment_start, $to_segment_len));
625:                 }
626:             }
627:         ksort($result, SORT_NUMERIC);
628:         return array_values($result);
629:         }
630: 
631:     632: 633: 634: 635: 636: 637: 638: 639: 640: 
641:     private static function ($text, $delimiters) {
642:         
643:         if ( empty($delimiters) ) {
644:             $chars = str_split($text, 1);
645:             $chars[strlen($text)] = '';
646:             return $chars;
647:             }
648:         $fragments = array();
649:         $start = $end = 0;
650:         for (;;) {
651:             $end += strcspn($text, $delimiters, $end);
652:             $end += strspn($text, $delimiters, $end);
653:             if ( $end === $start ) {
654:                 break;
655:                 }
656:             $fragments[$start] = substr($text, $start, $end - $start);
657:             $start = $end;
658:             }
659:         $fragments[$start] = '';
660:         return $fragments;
661:         }
662: 
663:     664: 665: 
666:     private static function renderToTextFromOpcode($opcode, $from, $from_offset, $from_len) {
667:         if ( $opcode === 'c' || $opcode === 'i' ) {
668:             echo substr($from, $from_offset, $from_len);
669:             }
670:         }
671: 
672:     private static function renderDiffToHTMLFromOpcode($opcode, $from, $from_offset, $from_len) {
673:         if ( $opcode === 'c' ) {
674:             echo htmlentities(htmlentities(substr($from, $from_offset, $from_len)));
675:             }
676:         else if ( $opcode === 'd' ) {
677:             $deletion = substr($from, $from_offset, $from_len);
678:             if ( strcspn($deletion, " \n\r") === 0 ) {
679:                 $deletion = str_replace(array("\n","\r"), array('\n','\r'), $deletion);
680:                 }
681:             echo '<del>', htmlentities(htmlentities($deletion)), '</del>';
682:             }
683:         else  {
684:             echo '<ins>', htmlentities(htmlentities(substr($from, $from_offset, $from_len))), '</ins>';
685:             }
686:         }
687:     }
688: 
689: