2017-11-15 27 views
2
Need help with extracting the firstname, middlename and lastname from a 
freetext fullname. How to extract them out with all these formats? 
Need to figure out how to handle format 2,5,9,7 

--fullname sample data 
DECLARE @name TABLE 
(fullname VARCHAR(100)) 
INSERT INTO @name SELECT 
'Malone,Susan M' UNION ALL SELECT --1 
'Conn,Chris G' UNION ALL SELECT --2 
'Van Pess,Wen B' UNION ALL SELECT --3 
'DESHPANDE, ANN W.' UNION ALL SELECT --4 
'Asif,LEE' UNION ALL SELECT --5 
'CERVANTES MANDY'UNION ALL SELECT --6 
'Bill, Dave' UNION ALL SELECT --7 
'SMITH,ANN M' UNION ALL SELECT --8 
'BHULLER, MATT' UNION ALL SELECT --9 
'KIM (DAUM), GAIL' UNION ALL SELECT --10 
'John.Mills'--11 

DECLARE @DELIMITER1 varchar(5), @DELIMITER2 varchar(5), @DELIMITER3 
varchar(5),@MAX_LENGTH int 
SET @DELIMITER1 = ',' 
SET @DELIMITER2 = ' ' 
SET @MAX_LENGTH = 50 


--LastName 
SELECT fullname, 
case when 
CHARINDEX(@DELIMITER2, fullname) >=1 
then replace(SUBSTRING(fullname, 1, CHARINDEX(@DELIMITER2, fullname) 
),',','')--replace to empty string if contains a "," 
when 
CHARINDEX(@DELIMITER2, fullname) =0 
then replace(SUBSTRING(fullname, 1, CHARINDEX(@DELIMITER1, fullname) 
),',','')--replace to empty string if contains a "," 
else null 
end as Lastname, 


--Middle Name 
CASE 
-- Middle fullname follows two-fullname first fullnames like Mary Ann 
    WHEN LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(fullname, 
CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0 
--when len is greater than 0 
    THEN SUBSTRING(fullname, LEN(fullname) - CHARINDEX(@DELIMITER2, 
REVERSE(fullname))+2, @MAX_LENGTH) 
     ELSE NULL 
END AS Middlefullname, 


--First Name 
CASE 
-- Count the number of @DELIMITER2. Choose the string between the 

    WHEN LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(fullname, 
CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0 -- 
--when len is greater than 0 
    Then replace(ltrim(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
1, 
---need help here 
(LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH))- 
LEN(SUBSTRING(fullname, LEN(fullname) - CHARINDEX(@DELIMITER2, 
REVERSE(fullname))+2, @MAX_LENGTH))))),'-','') --replace the "-" to empty 
string 
     ELSE ltrim(SUBSTRING(fullname,CHARINDEX(@DELIMITER1,fullname)+ 
    1,@MAX_LENGTH))--trimmed leading spaces 
END AS Firstname 
FROM @name 
order by fullname 
+0

데이터를 데이터베이스에 저장 하시겠습니까? '언 패킹'이 필요하며 '원자'데이터가 아닙니다. 두 규칙 중 하나를 위반하면 첫 번째 정규 양식을 위반합니다. – Zorkolot

답변

0

"이름 부분"을 점으로 연결하는 첫 번째 논리. 내 의견에 유의하십시오.

--fullname sample data 
DECLARE @name TABLE 
(nameid int identity, fullname VARCHAR(100)) 
INSERT INTO @name SELECT 
'Malone,Susan M' UNION ALL SELECT --1 
'Conn,Chris G' UNION ALL SELECT --2 
'Van Pess,Wen B' UNION ALL SELECT --3 
'DESHPANDE, ANN W.' UNION ALL SELECT --4 
'Asif,LEE' UNION ALL SELECT --5 
'CERVANTES MANDY'UNION ALL SELECT --6 
'Bill, Dave' UNION ALL SELECT --7 
'SMITH,ANN M' UNION ALL SELECT --8 
'BHULLER, MATT' UNION ALL SELECT --9 
'KIM (DAUM), GAIL' UNION ALL SELECT --10 
'John.Mills';--11 

with clean as 
(
    select original = fullname, prepped = dotted.fn, total.spaces 
    from @name 
    cross apply (values (patindex('%(%),%',fullname),fullname)) prep1(x,fn) -- check for parentheses: 
    cross apply (values (-- remove parentheses if they exist, replace commas w/ dots, dots w/ spaces: 
    case 
     when prep1.x > 1 then substring(fn,1,x-1) + substring(fn,charindex(',',fn,x)+1,8000) 
     else replace(replace(fn, ',', '.'),'.',' ') 
    end)) prep(fn) 
    cross apply (values (replace(rtrim(ltrim(replace(prep.fn,' ',' '))),' ',' '))) clean(fn) 
    cross apply (values (len(clean.fn)-len(replace(clean.fn,' ','')))) total(spaces) -- count spaces 
    cross apply (values (replace(clean.fn, ' ','.'))) dotted(fn) 
) 
select original, cleaned = 
    case spaces 
    when 1 then parsename(prepped,1)+' '+parsename(prepped,2) 
    when 2 then parsename(prepped,2)+' '+parsename(prepped,1)+' '+parsename(prepped,3) 
    when 3 then parsename(prepped,2)+' '+parsename(prepped,1)+' '+parsename(prepped,3)+ 
      ' '+parsename(prepped,4) 
    end 
from clean 

반환 :

original    cleaned 
-------------------- ------------------ 
Malone,Susan M  Susan M Malone 
Conn,Chris G   Chris G Conn 
Van Pess,Wen B  Wen B Pess Van 
DESHPANDE, ANN W. ANN W DESHPANDE 
Asif,LEE    LEE Asif 
CERVANTES MANDY  MANDY CERVANTES 
Bill, Dave   Dave Bill 
SMITH,ANN M   ANN M SMITH 
BHULLER, MATT  MATT BHULLER 
KIM (DAUM), GAIL  GAIL KIM 
John.Mills   Mills John 
012,351

--fullname sample data 
DECLARE @name TABLE 
(nameid int identity, fullname VARCHAR(100)) 
INSERT INTO @name SELECT 
'Malone,Susan M' UNION ALL SELECT --1 
'Conn,Chris G' UNION ALL SELECT --2 
'Van Pess,Wen B' UNION ALL SELECT --3 
'DESHPANDE, ANN W.' UNION ALL SELECT --4 
'Asif,LEE' UNION ALL SELECT --5 
'CERVANTES MANDY'UNION ALL SELECT --6 
'Bill, Dave' UNION ALL SELECT --7 
'SMITH,ANN M' UNION ALL SELECT --8 
'BHULLER, MATT' UNION ALL SELECT --9 
'KIM (DAUM), GAIL' UNION ALL SELECT --10 
'John.Mills';--11 


select original = fullname, prepped = dotted.fn, total.spaces 
from @name 
cross apply (values (patindex('%(%),%',fullname),fullname)) prep1(x,fn) -- check for parentheses: 
cross apply (values (-- remove parentheses if they exist, replace commas w/ dots, dots w/ spaces: 
    case 
    when prep1.x > 1 then substring(fn,1,x-1) + substring(fn,charindex(',',fn,x)+1,8000) 
    else replace(replace(fn, ',', '.'),'.',' ') 
    end)) prep(fn) 
cross apply (values (replace(rtrim(ltrim(replace(prep.fn,' ',' '))),' ',' '))) clean(fn) 
cross apply (values (len(clean.fn)-len(replace(clean.fn,' ','')))) total(spaces) -- count spaces 
cross apply (values (replace(clean.fn, ' ','.'))) dotted(fn); -- replace spaces with dots 

original     prepped    spaces 
------------------------ -------------------- ------- 
Malone,Susan M   Malone.Susan.M  2 
Conn,Chris G    Conn.Chris.G   2 
Van Pess,Wen B   Van.Pess.Wen.B  3 
DESHPANDE, ANN W.  DESHPANDE.ANN.W  2 
Asif,LEE     Asif.LEE    1 
CERVANTES MANDY   CERVANTES.MANDY  1 
Bill, Dave    Bill.Dave   1 
SMITH,ANN M    SMITH.ANN.M   2 
BHULLER, MATT   BHULLER.MATT   1 
KIM (DAUM), GAIL   KIM.GAIL    1 
John.Mills    John.Mills   1 

는 나머지는과 같이 parsename을 사용하여 수행 할 수 있습니다 반환

또는 regex clr (mdq.regexreplace)을 사용할 수 있습니다. 몇 년 전 나의 post on SSC에 주목하십시오.

+0

안녕하세요, 앨런이 도와 줘서 고마워! 데이터가 parsename을 사용할 때 4 개 이상의 도트가있는 경우 어떻게해야합니까? – Jason312

+0

3 개 부품 이상인 경우 게시 한 내용이 작동하지 않습니다. 오늘/오늘 저녁에 3 부분 이상을 처리하는 방법을 게시하려고합니다. –