Thursday, April 07, 2011

Convert CSV data into a SQL table


Recently I have been working with some report data that is stored statically in a database in CSV format. I had a need to have that CSV data displayed in a tabular format. I searched around and couldn't find anything written to do this in SQL, so I decided to write a stored procedure to do this very thing. It will take a CSV and parse it out into a SQL table. For instance if I have report data that looks like this...

"FirstName","LastName","DOB","Children"
"John","Smith","1/5/1980","Bob,Sally,Joe,Chris"
"Jane","Smith","2/25/1982","Bob,Sally,Joe,Chris"
"Bruce","Wayne","5/13/1975",""
"Peter","Parker","5/23/1970",""
"","","",""

It would be transformed into this ...

FirstNameLastNameDOBChildren
John Smith 1/5/1980 Bob,Sally,Joe,Chris
Jane Smith 2/25/1982 Bob,Sally,Joe,Chris
Bruce Wayne 5/13/1975
Peter Parker 5/23/1970

Note that in the last column the commas are preserved because they are within the quotation mark text qualifiers. Below is the stored procedure that does this. Feel free to use it, and leave a comment if you found it useful.

-- ==========================================================================================
-- Author:  Davin Studer
-- Create date: 4/5/2011
-- Description: This will take a CSV input
-- and transform it into a table
--
-- Params:
-- @string          - The CSV string
-- @textQualifier   - Character that denotes a string within the CSV
-- @columnDelimiter - Character that denotes different columns ... doesn't have to be a comma
-- @rowOneIsHeader  - Does the first row contain column names?
-- ==========================================================================================
create procedure [dbo].[CSVToTable]
    @string nvarchar(max) = '',
    @textQualifier varchar(1) = '',
    @columnDelimiter varchar(1) = ',',
    @rowOneIsHeader bit = 0
as
begin
    -- set nocount on added to prevent extra result sets from
    -- interfering with select statements.
    set nocount on;

    -- We need and input string
    if isnull(@string, '') = ''
    begin
        raiserror ('Please supply an CSV string', 15, 1)
        return
    end
 
    -- We need and column delimiter
    if isnull(@columnDelimiter, '') = ''
    begin
        raiserror ('Please supply a column delimiter.', 15, 1)
        return
    end
  
    -- Make sue the user doesn't pass null as a value
    if isnull(@textQualifier, '') = ''
    begin
        set @textQualifier = ''
    end

    -- Make sue the user doesn't pass null as a value
    if isnull(@rowOneIsHeader, '') = ''
    begin
        set @rowOneIsHeader = 0
    end

    declare
        @columns int = 1,
        @columnNames nvarchar(max) = '',
        @stop bit = 0,
        @position int = 0,
        @temp nvarchar(1) = '',
        @dataStart int = 0,
        @sql nvarchar(max) = '',
        @qualifierToggle bit = 0,
        @tempString nvarchar(max) = '',
        @delimiterReplacementUTFNumber int = 2603

    -- Get rid of the ##tempCSVSplitToTable table if it exists
    if object_id('tempdb..##tempCSVSplitToTable') is not null
    begin
        drop table ##tempCSVSplitToTable
    end

    -- Get rid of white space
    set @string = rtrim(ltrim(@string))

    -- Set the EOL to char(13)
    set @string = replace(@string, char(13) + char(10), char(13))
    set @string = replace(@string, char(10), char(13))

    -- Deal with the delimiter character within the text qualifier characters
    if @textQualifier <> ''
    begin
        while @position <> len(@string)
        begin
            set @temp = substring(@string, @position, 1)
            if @temp = @textQualifier
            begin
                if @qualifierToggle = 0
                begin
                    set @qualifierToggle = 1
                end
                else
                begin
                    set @qualifierToggle = 0
                end
            end
            if @temp = @columnDelimiter and @qualifierToggle = 1
            begin
                set @tempString = @tempString + nchar(@delimiterReplacementUTFNumber) -- replace with UTF delimiter replacement character
            end
            else
            begin
                set @tempString = @tempString + @temp
            end
            set @position = @position + 1
        end

        set @string = @tempString
    end

    -- Get rid of text qualifier ... we don't need it now
    if @textQualifier <> ''
    begin
        set @string = replace(@string, @textQualifier, '')
    end

    -- Get column names
    set @position = 1
    while @stop = 0
    begin
        set @temp = substring(@string, @position, 1)
        if @temp = @columnDelimiter
        begin
            set @columns = @columns + 1
            set @columnNames = @columnNames + ','
        end
        else if @temp = char(13)
        begin
            set @stop = 1
        end
        else
        begin
            set @columnNames = @columnNames + @temp
        end
        set @position = @position + 1
    end

    set @dataStart = @position

    if @rowOneIsHeader = 0
    begin
        set @dataStart = 1
        set @position = 1
        set @columnNames = ''
        while @position - 1 < @columns
        begin
            set @columnNames = @columnNames + ',Column' + cast(@position as varchar(1))
            set @position = @position + 1
        end
        set @columnNames = substring(@columnNames, 2, len(@columnNames))
    end

    -- Build ##tempCSVSplitToTable table
    set @sql = @sql + 'create table ##tempCSVSplitToTable (' + char(13) + '['
    set @stop = 0
    set @position = 1
    while @stop = 0
    begin
        set @temp = substring(@columnNames, @position, 1)
        if @temp <> ','
        begin
            set @sql = @sql + @temp
        end
        else
        begin
            set @sql = @sql + '] nvarchar(max),' + char(13) + '['
        end
   
        set @position = @position + 1

        if @position - 1 = len(@columnNames)
        begin
            set @sql = @sql + '] nvarchar(max)'
            set @stop = 1
        end
    end
    set @sql = @sql + ')' + char(13)
    exec (@sql)

    -- insert values into ##tempCSVSplitToTable table
    set @position = @dataStart
    while @position - 1 < len(@string)
    begin
        set @stop = 0
        set @sql = 'insert into ##tempCSVSplitToTable ([' + replace(@columnNames, ',', '],[') + ']) values ('''
        while @stop = 0
        begin
            set @temp = substring(@string, @position, 1)

            -- end of column
            if @temp = @columnDelimiter
            begin
                set @sql = @sql + ''','''
            end
            -- EOL
            else if @temp = char(13) or datalength(@temp) = 0
            begin
                set @stop = 1
            end
            -- deal with apostrophe in data
            else if @temp = ''''
            begin
                set @sql = @sql + ''''''
            end
            -- column data that isn't an apostrophe
            else
            begin
                set @sql = @sql + @temp
            end

            set @position = @position + 1
        end 
        set @sql = @sql + ''')'

        -- Get rid of any UTF delimiter replacements that were put in to take the place of the delimiter character within the text qualifier
        if @textQualifier <> ''
        begin
            set @sql = replace(@sql, nchar(@delimiterReplacementUTFNumber), @columnDelimiter)
        end

        exec(@sql)
    end

    select * from ##tempCSVSplitToTable

    -- destroy ##tempCSVSplitToTable table
    if object_id('tempdb..##tempCSVSplitToTable') is not null
    begin
        drop table ##tempCSVSplitToTable
    end
end