UFN_ADDRESS_STANDARDIZE

Return

Return Type
nvarchar(150)

Parameters

Parameter Parameter Type Mode Description
@INPUTADDRESS nvarchar(150) IN
@COUNTRYID uniqueidentifier IN

Definition

Copy


CREATE function dbo.UFN_ADDRESS_STANDARDIZE(
 @INPUTADDRESS nvarchar(150),
 @COUNTRYID uniqueidentifier
)
returns nvarchar(150)
as
begin
    -- when countryid doesn't have a value try setting it to the default value

    if @COUNTRYID is null
      select @COUNTRYID = DEFAULTCOUNTRYID
      from dbo.INTERNATIONALIZATIONINFO;

    declare @SPLITSTRINGS table(TOKEN nvarchar(150), TRANSLATION nvarchar(40), ID int, WORDNO tinyint, HASSPACE bit); -- holds single and multi word tokens from the original input string

    declare @INDEX smallint = 1
    declare @STRINGTOKEN nvarchar(40) = '';   -- single word token container

    declare @IDCOUNTER smallint = 0;          -- used to generate the id of each token row

    declare @ISSPECIALCHAR bit = 0;           -- flag that indicates a special character (- , ;)


    -- remove periods and leading and trailing whitespace

    set @INPUTADDRESS = replace(ltrim(rtrim(@INPUTADDRESS)),'.','');

    -- loop through the input string and populate the token table with single word tokens

    -- in case of special characters that have a space in front, after or around set the @ISSPECIAL flag

    declare @LENGTH int = len(@INPUTADDRESS);
    declare @SUBINPUTADDRESS nvarchar(1);
    declare @ALLTOKENS nvarchar(400) = '';

    while @INDEX <= @LENGTH
    begin
      set @SUBINPUTADDRESS = SUBSTRING(@INPUTADDRESS, @INDEX,1)
      if @SUBINPUTADDRESS = ' '
      begin
        if @ISSPECIALCHAR = 1
        begin
          set @STRINGTOKEN = @STRINGTOKEN + @SUBINPUTADDRESS
          set @ISSPECIALCHAR = 0
        end

        if @STRINGTOKEN <> ''
        begin
          insert into @SPLITSTRINGS (TOKEN, WORDNO,ID, HASSPACE) values (@STRINGTOKEN, 1, power(2,@IDCOUNTER), 1);
          set @ALLTOKENS = @ALLTOKENS + @STRINGTOKEN + ' '
          set @STRINGTOKEN = ''
          set @IDCOUNTER = @IDCOUNTER+1
        end
      end

      else if @SUBINPUTADDRESS in('-',',',';',char(13),char(10))
      begin
        if @STRINGTOKEN <> ''
        begin
          insert into @SPLITSTRINGS (TOKEN, WORDNO,ID, HASSPACE) values (@STRINGTOKEN, 1, power(2,@IDCOUNTER), 0);
          set @ALLTOKENS = @ALLTOKENS + @STRINGTOKEN
          set @STRINGTOKEN = ''
          set @IDCOUNTER = @IDCOUNTER+1
        end

        set @STRINGTOKEN = @STRINGTOKEN + @SUBINPUTADDRESS
        set @ISSPECIALCHAR = 1
      end

      else
      begin
        if @ISSPECIALCHAR = 1
        begin
          insert into @SPLITSTRINGS (TOKEN, WORDNO,ID, HASSPACE) values (@STRINGTOKEN, 1, power(2,@IDCOUNTER), 0);
          set @ALLTOKENS = @ALLTOKENS + @STRINGTOKEN
          set @STRINGTOKEN = ''
          set @IDCOUNTER = @IDCOUNTER+1
          set @ISSPECIALCHAR = 0
        end

        set @STRINGTOKEN = @STRINGTOKEN + @SUBINPUTADDRESS
      end

      set @INDEX = @INDEX +1
    end

    -- add last single word token to the table

    if @STRINGTOKEN <> '' 
    begin
      insert into @SPLITSTRINGS (TOKEN, WORDNO,ID, HASSPACE) values (@STRINGTOKEN, 1, power(2,@IDCOUNTER), 0);
      set @ALLTOKENS = @ALLTOKENS + @STRINGTOKEN;
    end

    -- insert multi word tokens resulted by combining existing tokens; using binary representation and to determine the combinations

    set @INDEX =1
    declare @TOKENCOUNT tinyint = (select count(TOKEN) from @SPLITSTRINGS where TOKEN not in('-',',',';',char(13),char(10)))
    declare @COMBINEDTOKENS nvarchar(100) = '11' -- used to determine the combination of one word tokens


    while @INDEX < @TOKENCOUNT 
    begin
      insert into @SPLITSTRINGS(TOKEN, WORDNO, ID, HASSPACE)
      select S1.TOKEN + ' ' + S2.TOKEN, @INDEX + 1, S1.ID + S2.ID, S2.HASSPACE 
      from @SPLITSTRINGS S1, @SPLITSTRINGS S2 
      where S1.WORDNO = 1 and S2.WORDNO = @INDEX and S1.ID < S2.ID
      and charindex(S1.TOKEN + ' ' + S2.TOKEN, @ALLTOKENS) > 0
      and S1.TOKEN not in('-',',',';',char(13),char(10)) and S2.TOKEN not in('-',',',';',char(13),char(10));

      set @COMBINEDTOKENS = @COMBINEDTOKENS + '1'
      set @INDEX = @INDEX + 1
    end

    -- apply translations on all tokens in the table excluding special character tokens

    update @SPLITSTRINGS
    set TRANSLATION = A.TRANSLATION
    from @SPLITSTRINGS S
    inner join dbo.ADDRESSTOKEN A on A.TOKEN = ' '+S.TOKEN+case when right(S.TOKEN,1)<>' ' then ' ' else '' end and S.TOKEN not in('-',',',';',char(13),char(10)) and A.COUNTRYID=@COUNTRYID 

    -- after translations where added, delete the token rows which are part of a multi token that had a translation using bitwise '&'

    -- if a combination of tokens triggered a translation use that instead of the translation of the composing one word tokens

    delete from @SPLITSTRINGS
    where ID in(select S1.ID
                from @SPLITSTRINGS S1, @SPLITSTRINGS S2
                where (S1.ID & S2.ID) > 0
                and S1.WORDNO < S2.WORDNO
                and S2.TRANSLATION is not null)
    or (WORDNO > 1 and TRANSLATION is null)

    -- rebuild the input string ordering by the id of the tokens

    select @INPUTADDRESS = dbo.UDA_BUILDLISTWITHDELIMITER(LTRIM(RTRIM(ISNULL(S.TRANSLATION,S.TOKEN))) + case when HASSPACE = 1 then ' ' else '' end,'')
    from (select top(100) * from @SPLITSTRINGS ORDER BY ID) S

    return @INPUTADDRESS
end