UFN_ADDRESS_STANDARDIZE
Return
Return Type |
---|
nvarchar(150) |
Parameters
Parameter | Parameter Type | Mode | Description |
---|---|---|---|
@INPUTADDRESS | nvarchar(150) | IN | |
@COUNTRYID | uniqueidentifier | IN |
Definition
Copy
CREATE function dbo.UFN_ADDRESS_STANDARDIZE(
@INPUTADDRESS nvarchar(150),
@COUNTRYID uniqueidentifier
)
returns nvarchar(150)
as
begin
-- when countryid doesn't have a value try setting it to the default value
if @COUNTRYID is null
select @COUNTRYID = DEFAULTCOUNTRYID
from dbo.INTERNATIONALIZATIONINFO;
declare @SPLITSTRINGS table(TOKEN nvarchar(150), TRANSLATION nvarchar(40), ID int, WORDNO tinyint, HASSPACE bit); -- holds single and multi word tokens from the original input string
declare @INDEX smallint = 1;
declare @STRINGTOKEN nvarchar(40) = ''; -- single word token container
declare @IDCOUNTER smallint = 0; -- used to generate the id of each token row
declare @ISSPECIALCHAR bit = 0; -- flag that indicates a special character (- , ;)
-- remove periods and leading and trailing whitespace
set @INPUTADDRESS = replace(ltrim(rtrim(@INPUTADDRESS)),'.','');
-- loop through the input string and populate the token table with single word tokens
-- in case of special characters that have a space in front, after or around set the @ISSPECIAL flag
declare @LENGTH int = len(@INPUTADDRESS);
declare @SUBINPUTADDRESS nvarchar(1);
declare @ALLTOKENS nvarchar(400) = '';
while @INDEX <= @LENGTH
begin
set @SUBINPUTADDRESS = SUBSTRING(@INPUTADDRESS, @INDEX,1)
if @SUBINPUTADDRESS = ' '
begin
if @ISSPECIALCHAR = 1
begin
set @STRINGTOKEN = @STRINGTOKEN + @SUBINPUTADDRESS
set @ISSPECIALCHAR = 0
end
if @STRINGTOKEN <> ''
begin
insert into @SPLITSTRINGS (TOKEN, WORDNO,ID, HASSPACE) values (@STRINGTOKEN, 1, power(2,@IDCOUNTER), 1);
set @ALLTOKENS = @ALLTOKENS + @STRINGTOKEN + ' '
set @STRINGTOKEN = ''
set @IDCOUNTER = @IDCOUNTER+1
end
end
else if @SUBINPUTADDRESS in('-',',',';',char(13),char(10))
begin
if @STRINGTOKEN <> ''
begin
insert into @SPLITSTRINGS (TOKEN, WORDNO,ID, HASSPACE) values (@STRINGTOKEN, 1, power(2,@IDCOUNTER), 0);
set @ALLTOKENS = @ALLTOKENS + @STRINGTOKEN
set @STRINGTOKEN = ''
set @IDCOUNTER = @IDCOUNTER+1
end
set @STRINGTOKEN = @STRINGTOKEN + @SUBINPUTADDRESS
set @ISSPECIALCHAR = 1
end
else
begin
if @ISSPECIALCHAR = 1
begin
insert into @SPLITSTRINGS (TOKEN, WORDNO,ID, HASSPACE) values (@STRINGTOKEN, 1, power(2,@IDCOUNTER), 0);
set @ALLTOKENS = @ALLTOKENS + @STRINGTOKEN
set @STRINGTOKEN = ''
set @IDCOUNTER = @IDCOUNTER+1
set @ISSPECIALCHAR = 0
end
set @STRINGTOKEN = @STRINGTOKEN + @SUBINPUTADDRESS
end
set @INDEX = @INDEX +1
end
-- add last single word token to the table
if @STRINGTOKEN <> ''
begin
insert into @SPLITSTRINGS (TOKEN, WORDNO,ID, HASSPACE) values (@STRINGTOKEN, 1, power(2,@IDCOUNTER), 0);
set @ALLTOKENS = @ALLTOKENS + @STRINGTOKEN;
end
-- insert multi word tokens resulted by combining existing tokens; using binary representation and to determine the combinations
set @INDEX =1
declare @TOKENCOUNT tinyint = (select count(TOKEN) from @SPLITSTRINGS where TOKEN not in('-',',',';',char(13),char(10)))
declare @COMBINEDTOKENS nvarchar(100) = '11' -- used to determine the combination of one word tokens
while @INDEX < @TOKENCOUNT
begin
insert into @SPLITSTRINGS(TOKEN, WORDNO, ID, HASSPACE)
select S1.TOKEN + ' ' + S2.TOKEN, @INDEX + 1, S1.ID + S2.ID, S2.HASSPACE
from @SPLITSTRINGS S1, @SPLITSTRINGS S2
where S1.WORDNO = 1 and S2.WORDNO = @INDEX and S1.ID < S2.ID
and charindex(S1.TOKEN + ' ' + S2.TOKEN, @ALLTOKENS) > 0
and S1.TOKEN not in('-',',',';',char(13),char(10)) and S2.TOKEN not in('-',',',';',char(13),char(10));
set @COMBINEDTOKENS = @COMBINEDTOKENS + '1'
set @INDEX = @INDEX + 1
end
-- apply translations on all tokens in the table excluding special character tokens
update @SPLITSTRINGS
set TRANSLATION = A.TRANSLATION
from @SPLITSTRINGS S
inner join dbo.ADDRESSTOKEN A on A.TOKEN = ' '+S.TOKEN+case when right(S.TOKEN,1)<>' ' then ' ' else '' end and S.TOKEN not in('-',',',';',char(13),char(10)) and A.COUNTRYID=@COUNTRYID
-- after translations where added, delete the token rows which are part of a multi token that had a translation using bitwise '&'
-- if a combination of tokens triggered a translation use that instead of the translation of the composing one word tokens
delete from @SPLITSTRINGS
where ID in(select S1.ID
from @SPLITSTRINGS S1, @SPLITSTRINGS S2
where (S1.ID & S2.ID) > 0
and S1.WORDNO < S2.WORDNO
and S2.TRANSLATION is not null)
or (WORDNO > 1 and TRANSLATION is null)
-- rebuild the input string ordering by the id of the tokens
select @INPUTADDRESS = dbo.UDA_BUILDLISTWITHDELIMITER(LTRIM(RTRIM(ISNULL(S.TRANSLATION,S.TOKEN))) + case when HASSPACE = 1 then ' ' else '' end,'')
from (select top(100) * from @SPLITSTRINGS ORDER BY ID) S
return @INPUTADDRESS
end