Вопрос:

Translate Cyrillic to Latin with SQL Server

sql sql-server

635 просмотра

3 ответа

1292 Репутация автора

How to convert Cyrillic to Latin in T-SQL function?

Update. As I have not found anything ready on the web, I have asked the question and I have enclosed the answer to my question to share the code I have made. I marked the tick for knowledge sharing while asking question.

Автор: Przemyslaw Remin Источник Размещён: 04.01.2018 03:56

Ответы (3)


0 плюса

1292 Репутация автора

Вот решение, переводящее кириллицу на латиницу с REPLACEфункцией.

create function dbo.Cyrillic2Latin (@string nvarchar(max)) 
returns nvarchar(max) as
begin

set @string = replace (@string, N'ый'       ,N'y')
set @string = replace (@string, N'ЫЙ'       ,N'Y')
set @string = replace (@string, N'а'        ,N'a')
set @string = replace (@string, N'б'        ,N'b')
set @string = replace (@string, N'в'        ,N'v')
set @string = replace (@string, N'г'        ,N'g')
set @string = replace (@string, N'д'        ,N'd')
set @string = replace (@string, N'е'        ,N'e')
set @string = replace (@string, N'ё'        ,N'yo')
set @string = replace (@string, N'ж'        ,N'zh')
set @string = replace (@string, N'з'        ,N'z')
set @string = replace (@string, N'и'        ,N'i')
set @string = replace (@string, N'й'        ,N'y')
set @string = replace (@string, N'к'        ,N'k')
set @string = replace (@string, N'л'        ,N'l')
set @string = replace (@string, N'м'        ,N'm')
set @string = replace (@string, N'н'        ,N'n')
set @string = replace (@string, N'о'        ,N'o')
set @string = replace (@string, N'п'        ,N'p')
set @string = replace (@string, N'р'        ,N'r')
set @string = replace (@string, N'с'        ,N's')
set @string = replace (@string, N'т'        ,N't')
set @string = replace (@string, N'у'        ,N'u')
set @string = replace (@string, N'ф'        ,N'f')
set @string = replace (@string, N'х'        ,N'kh')
set @string = replace (@string, N'ц'        ,N'c')
set @string = replace (@string, N'ч'        ,N'ch')
set @string = replace (@string, N'ш'        ,N'sh')
set @string = replace (@string, N'щ'        ,N'shch')
set @string = replace (@string, N'ъ'        ,N' ')
set @string = replace (@string, N'ы'        ,N'y')
set @string = replace (@string, N'ь'        ,N'')
set @string = replace (@string, N'э'        ,N'e')
set @string = replace (@string, N'ю'        ,N'yu')
set @string = replace (@string, N'я'        ,N'ya')
set @string = replace (@string, N'А'        ,N'A')
set @string = replace (@string, N'Б'        ,N'B')
set @string = replace (@string, N'В'        ,N'V')
set @string = replace (@string, N'Г'        ,N'G')
set @string = replace (@string, N'Д'        ,N'D')
set @string = replace (@string, N'Е'        ,N'E')
set @string = replace (@string, N'Ё'        ,N'YO')
set @string = replace (@string, N'Ж'        ,N'ZH')
set @string = replace (@string, N'З'        ,N'Z')
set @string = replace (@string, N'И'        ,N'I')
set @string = replace (@string, N'Й'        ,N'Y')
set @string = replace (@string, N'К'        ,N'K')
set @string = replace (@string, N'Л'        ,N'L')
set @string = replace (@string, N'М'        ,N'M')
set @string = replace (@string, N'Н'        ,N'N')
set @string = replace (@string, N'О'        ,N'O')
set @string = replace (@string, N'П'        ,N'P')
set @string = replace (@string, N'Р'        ,N'R')
set @string = replace (@string, N'С'        ,N'S')
set @string = replace (@string, N'Т'        ,N'T')
set @string = replace (@string, N'У'        ,N'U')
set @string = replace (@string, N'Ф'        ,N'F')
set @string = replace (@string, N'Х'        ,N'KH')
set @string = replace (@string, N'Ц'        ,N'C')
set @string = replace (@string, N'Ч'        ,N'CH')
set @string = replace (@string, N'Ш'        ,N'SH')
set @string = replace (@string, N'Щ'        ,N'SHCH')
set @string = replace (@string, N'Ъ'        ,N'')
set @string = replace (@string, N'Ы'        ,N'Y')
set @string = replace (@string, N'Ь'        ,N'')
set @string = replace (@string, N'Э'        ,N'E')
set @string = replace (@string, N'Ю'        ,N'YU')
set @string = replace (@string, N'Я'        ,N'YA')

return @String
end
Автор: Przemyslaw Remin Размещён: 04.01.2018 03:56

3 плюса

28569 Репутация автора

Решение

Using Prezemyslaw Remin's initial answer as a point of reference for the replacements, here is a much better TVF version:

CREATE FUNCTION dbo.Cyrillic2Latin_tv (@String nvarchar(MAX))
RETURNS TABLE
AS RETURN(
    SELECT REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@string,N'ый',N'y'),N'ЫЙ',N'Y'),N'а',N'a'),N'б',N'b'),N'в',N'v'),N'г',N'g'),N'д',N'd'),N'е',N'e'),N'ё',N'yo'),N'ж',N'zh'),N'з',N'z'),N'и',N'i'),N'й',N'y'),N'к',N'k'),N'л',N'l'),N'м',N'm'),N'н',N'n'),N'о',N'o'),N'п',N'p'),N'р',N'r'),N'с',N's'),N'т',N't'),N'у',N'u'),N'ф',N'f'),N'х',N'kh'),N'ц',N'c'),N'ч',N'ch'),N'ш',N'sh'),N'щ',N'shch'),N'ъ',N''),N'ы',N'y'),N'ь',N''),N'э',N'e'),N'ю',N'yu'),N'я',N'ya'),N'А',N'A'),N'Б',N'B'),N'В',N'V'),N'Г',N'G'),N'Д',N'D'),N'Е',N'E'),N'Ё',N'YO'),N'Ж',N'ZH'),N'З',N'Z'),N'И',N'I'),N'Й',N'Y'),N'К',N'K'),N'Л',N'L'),N'М',N'M'),N'Н',N'N'),N'О',N'O'),N'П',N'P'),N'Р',N'R'),N'С',N'S'),N'Т',N'T'),N'У',N'U'),N'Ф',N'F'),N'Х',N'KH'),N'Ц',N'C'),N'Ч',N'CH'),N'Ш',N'SH'),N'Щ',N'SHCH'),N'Ъ',N''),N'Ы',N'Y'),N'Ь',N''),N'Э',N'E'),N'Ю',N'YU'),N'Я',N'YA') AS NonCyrillic
);

(Yes, I realise that is ugly to look at)

As for a comparison, did a "quick" test of our production server at work (_s suffix function is the scaler and _tv is the Table-Value Function). The SQL:

SET STATISTICS IO ON;
SET STATISTICS TIME ON;

PRINT 'Using Scalar';
SELECT dbo.Cyrillic2Latin_s(BCM.[Name]) AS ReplacedName
FROM OpenGI..ic_yyclient BCM
WHERE BCM.[Name] IS NOT NULL;

PRINT 'Using TVF';
SELECT C2L.NonCyrillic AS ReplacedName
FROM OpenGI..ic_yyclient BCM
     CROSS APPLY dbo.Cyrillic2Latin_tv(BCM.[Name]) C2L
WHERE BCM.[Name] IS NOT NULL;


SET STATISTICS IO OFF;
SET STATISTICS TIME OFF;

And the Statistics:

Using Scalar

 SQL Server Execution Times:
   CPU time = 0 ms,  elapsed time = 0 ms.

(147548 rows affected)
Table 'icp_yyclient'. Scan count 1, logical reads 1449, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

 SQL Server Execution Times:
   CPU time = 78672 ms,  elapsed time = 84518 ms.
Using TVF

 SQL Server Execution Times:
   CPU time = 0 ms,  elapsed time = 0 ms.

(147548 rows affected)
Table 'icp_yyclient'. Scan count 1, logical reads 1449, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

 SQL Server Execution Times:
   CPU time = 36140 ms,  elapsed time = 36696 ms.

 SQL Server Execution Times:
   CPU time = 0 ms,  elapsed time = 0 ms.

Как вы можете видеть, хотя сканирование и логическое чтение были одинаковыми, время обработки для 147 548 строк было значительно меньше с использованием TVF (более чем на 48 секунд). Таким образом, TVF является гораздо лучшим решением этой проблемы.

Автор: Larnu Размещён: 04.01.2018 04:25

0 плюса

608 Репутация автора

    /* PREPARE TABLES WITH BOTH LETTERS CYRILIC AND LATIN */

CREATE TABLE dbo.Cyrilic
(
sn INT,
Letter NVARCHAR(2)
)
GO

INSERT INTO Cyrilic
(sn, Letter)
VALUES
(1,N'а'),
(2,N'б'),
(3,N'в'),
(4,N'г'),
(5,N'д'),
(6,N'ђ'),
(7,N'е'),
(8,N'ж'),
(9,N'з'),
(10,N'и'),
(11,N'ј'),
(12,N'к'),
(13,N'л'),
(14,N'љ'),
(15,N'м'),
(16,N'н'),
(17,N'њ'),
(18,N'о'),
(19,N'п'),
(20,N'р'),
(21,N'с'),
(22,N'т'),
(23,N'ћ'),
(24,N'у'),
(25,N'ф'),
(26,N'х'),
(27,N'ц'),
(28,N'ч'),
(29,N'џ'),
(30,N'ш')

CREATE TABLE dbo.Latin
(
sn INT,
Letter NVARCHAR(2)
)
GO

INSERT INTO Latin
(sn, Letter)
VALUES
(1,N'a'),
(2,N'b'),
(3,N'v'),
(4,N'g'),
(5,N'd'),
(6,N'dj'),
(7,N'е'),
(8,N'ž'),
(9,N'z'),
(10,N'i'),
(11,N'j'),
(12,N'k'),
(13,N'l'),
(14,N'lj'),
(15,N'm'),
(16,N'n'),
(17,N'nj'),
(18,N'o'),
(19,N'p'),
(20,N'r'),
(21,N's'),
(22,N't'),
(23,N'ć'),
(24,N'u'),
(25,N'f'),
(26,N'h'),
(27,N'c'),
(28,N'č'),
(29,N'dž'),
(30,N'š')
GO

/* CREATE FUNCTION TO CONVERT STRING WITH PARAMETER TO PASS */

CREATE FUNCTION ConvertToLatin (@string NVARCHAR(MAX))  
RETURNS NVARCHAR(MAX) AS  
BEGIN  
    DECLARE @i int  
    DECLARE @character NVARCHAR(1)
    DECLARE @characterForCheck NVARCHAR(1)
    DECLARE @dest NVARCHAR(1000)  
    DECLARE @characterExists BIT
    SET @dest = ''

    SET @i = 1;  
    DECLARE @src NVARCHAR(MAX) = (SELECT N''+@string)
    WHILE (@i <= LEN(@src))  
    BEGIN  
    SET @character = SUBSTRING(@src, @i, 1)  
    SET @characterForCheck = LOWER(@character)
    DECLARE @isLowoer BIT 
    SET @isLowoer = (SELECT CASE WHEN N'' + UNICODE(@character) LIKE N'' + UNICODE(LOWER(@character))  THEN 1 ELSE 0 END)

    SET @characterExists = (SELECT COUNT(*) FROM Cyrilic c WHERE c.Letter LIKE N''+@characterForCheck)
    SET @dest = (SELECT TOP 1 @dest +
                CASE WHEN @characterExists = 0 
                     THEN N''+@character 
                     ELSE 
                        CASE 
                            WHEN @isLowoer = 1 
                            THEN l.Letter 
                            --if there is two letters in one (lj,nj,dz, then just first is upper
                            ELSE UPPER(LEFT(l.Letter,1))+LOWER(SUBSTRING(l.Letter,2,LEN(l.Letter)))
                    END
                END 
    FROM Cyrilic c
    INNER JOIN Latin l ON l.sn = c.sn
    WHERE c.Letter LIKE N''+@characterForCheck OR @characterExists = 0)

    SET @i = @i + 1  
END
    RETURN @dest  
END  

/* CALL FUNCTION AND PASS SOME PARAMETER (IT COULD BE SELECT LIKE IN EXAMPLE */
SELECT dbo.ConvertToLatin((SELECT N'Тест'))
Автор: Dado Размещён: 15.05.2019 02:29
Вопросы из категории :
32x32