1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
| <?PHP
if(isset($_GET['showsource']))
die(highlight_file(__FILE__, true));
//No automatic slash-or-quotes-behaviour I don't know about
if (get_magic_quotes_gpc()) {
function stripslashes_deep($value)
{
$value = is_array($value) ?
array_map('stripslashes_deep', $value) :
stripslashes($value);
return $value;
}
$_REQUEST = array_map('stripslashes_deep', $_REQUEST);
$_POST = array_map('stripslashes_deep', $_POST);
$_GET = array_map('stripslashes_deep', $_GET);
$_COOKIE = array_map('stripslashes_deep', $_COOKIE);
}
function levenshteinPlus($s, $t, $wantArray = 0, $debug = 0)
{
if(is_array($s))
{
$n = count($s);
$m = count($t);
}
else
{
$n = strlen($s);
$m = strlen($t);
}
$matrix = array();
// Initialize first row with 0..n
for($i = 0; $i <= $n; $i++)
$d[$i][0] = $i;
// Intialize first column with 0..m
for($j = 0; $j <= $m; $j++)
$d[0][$j] = $j;
// Cost calculation loop
$maxSubCost = 0;
for($i = 1; $i <= $n; $i++)
{
$sc = $s[($i - 1)];
for($j = 1; $j <= $m; $j++)
{
if(preg_replace('/[[:punct:]]/', '', $sc) == preg_replace('/[[:punct:]]/', '', $t[($j - 1)])) // Characters are equal, no cost
$subCost = 0;
else // Characters are not equal, costs 1
$subCost = 1;
$left = $d[($i - 1)][$j] + 1; // left + deletion cost
$above = $d[$i][($j - 1)] + 1; // above + insertion cose
$diagonal = $d[($i - 1)][($j - 1)] + $subCost; // diagonal + substitution cost
$d[$i][$j] = min($above, min($left, $diagonal)); // Put minimum in current cell
if($d[$i][$j] > $maxSubCost)
$maxSubCost = $d[$i][$j];
}
}
$i = $n;
$j = $m;
// Find the optimal path backwards:
if($wantArray)
{
$res = array();
$stappen = 0;
while(!($i == 0 && $j == 0))
{
$stappen++;
$left = ($i == 0)? $maxSubCost : $d[($i - 1)][$j];
$above = ($j == 0)? $maxSubCost : $d[$i][($j - 1)];
$diagonal = ($i == 0 || $j == 0)? $maxSubCost : $d[($i - 1)][($j - 1)];
$minimal = min($above, min($left, $diagonal));
// Choose minimal of left, above and diagonal:
if($diagonal == $minimal)
{
$sc = $s[($i - 1)];
$tc = $t[($j - 1)];
if(preg_replace('/[[:punct:]]/', '', $sc) == preg_replace('/[[:punct:]]/', '', $tc))
{
if($debug) echo "Leave '".$tc."' the same<br>";
$res[] = array('0', $tc);
}
else
{
if($debug) echo "Substitute '".$sc."' with '".$tc."'<br>";
$res[] = array('1', $sc, $tc);
}
$i--;
$j--;
}
else if($above == $minimal)
{
if($debug) echo "Insert '".$t[($j - 1)]."'<br>";
$res[] = array('+', $t[($j - 1)]);
$j--;
}
else // left==minimal
{
if($debug) echo "Delete '".$s[($i - 1)]."'<br>";
$res[] = array('-', $s[($i - 1)]);
$i--;
}
}
$res[] = array($d[$n][$m], $stappen);
$res = array_reverse($res);
return $res;
}
return $d[$n][$m];
}
$str1 = 'The price of crude oil fell nearly $3 and settled below $62 a barrel Monday on a supply glut at a key North American delivery point as geopolitical tensions loomed.
An oversupply at the storage hub in Cushing, Okla., while localized, had a disproportionate effect on prices, said Citigroup Global Markets energy analyst Tim Evans. eDesign.nl is a cool website.
Because the hub serves as a delivery point for wood, the facility is the "Achilles heel" for the petroleum market, he said. As it nears capacity, dealers have no place to store more oil should they want to take advantage of low prices on the May futures contract, which expires April 20.';
$str2 = 'The price of crude oil fell nearly $3 and settled below $62 a barrel Monday on a supply glut at a key North American delivery point as geopolitical tensions loomed.
An oversupply at the storage hub in Cushing, Okla., while localized, had a disproportionate effect on prices, said Citigroup Global Markets energy analyst Tim Evans.
Because the hub serves as a delivery point for oil, the facility is the "Achilles heel" for the petroleum market, he said. As it nears capacity, dealers have no place to store more oil should they want to take advantage of low prices on the May futures contract, which expires April 20, Evans said.';
if(isset($_POST['source']))
$str1 = $_POST['source'];
if(isset($_POST['target']))
$str2 = $_POST['target'];
$res = levenshteinPlus(explode(" ", $str1), explode(" ", $str2), 1);
?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<title>Levenshtein extended</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
</head>
<body style="font-family: Lucida Console; font-size: 10pt;">
<h1>Levenshtein extended</h1>
<form action="<?PHP echo $_SERVER['PHP_SELF']; ?>" method="post">
<table border="0">
<tr>
<td>Source text</td>
<td>Target text</td>
</tr>
<tr>
<td><textarea name="source" style="border: 1px solid black; width:500px; height:250px;" cols="62" rows="15"><?php echo htmlentities($str1); ?></textarea></td>
<td><textarea name="target" style="border: 1px solid black; width:500px; height:250px;" cols="62" rows="15"><?php echo htmlentities($str2); ?></textarea></td>
</tr>
<tr>
<td><a href="http://www.edesign.nl/archives/20">Levenshtein showcase</a> by <a href="http://www.edesign.nl">eDesign.nl</a> (<a href="<?php echo $_SERVER['PHP_SELF']; ?>?showsource=true">source code</a>)</td>
<td style="text-align: right"><input type="submit" value="Compare texts" /></td>
</tr>
</table>
</form>
<h2>Results:</h2>
<?PHP
echo '<p>Source text has '.count(explode(' ', $str1)).' words and the target text has '.count(explode(' ', $str2)).'. The transformation needed '.round(($res[0][0]/$res[0][1])*100).'% of the source text to be modified.<br />';
?>
<br />
Compared text:</p>
<div style="border: 1px solid black; width:1009px;"><p>
<?php
$out = '';
for($i = 1; $i < count($res); $i++)
{
if($res[$i][0] == '0')
$out .= $res[$i][1]." ";
else if($res[$i][0] == '1')
$out .= "<span style='color: blue;'>".$res[$i][2]." </span>";
else if($res[$i][0] == '+')
$out .= "<span style='color: blue;'>".$res[$i][1]." </span>";
else if($res[$i][0] == '-')
$out .= "<span style='color: blue;'>".$res[$i][1]." </span>";
}
echo str_replace("\n", '<br />', $out);
?></p></div>
<p>
<a href="http://validator.w3.org/check?uri=referer"><img
src="http://www.w3.org/Icons/valid-xhtml10"
alt="Valid XHTML 1.0 Strict" height="31" width="88" style="border: 0px;" /></a>
</p>
</body>
</html> |