Splitting text and putting it into dictionary











up vote
2
down vote

favorite












I have text with 600 words and I'm supposed to delete every quotation marks, numbers(years, dates, ..), digits ,... I should only have words, and I have to put in into dictionary.



So I have tried to go through with for each loop and get the first letter and save it in a list. Then I split every row in a word.
e.g.:




You are pretty.



You
are
pretty


The problem there are words in a row they're still same but they shouldn't be same. I've tried to fix it but I couldn't find any solution.



 public Dictionary<string, int> words = new Dictionary<string, int>();
public Dictionary<char, List<string>> firstletter = new Dictionary<char, List<string>>();
public Aufgabe(string filename)
{
string filler = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ";
foreach (char f in filler)
{
firstletter[f] = new List<string>();
}

Load(filename);

}
public void Load(string filename)
{
List<string> w = new List<string>();
StreamReader r = new StreamReader(filename);



while (!r.EndOfStream)
{
string row = r.ReadLine();
string parts = row.Split(' ');
string sonderzeichen = new string { "@", ",", ".", ";", "'", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "(", ")", "{",
"}", "!", "?", "/", """, "&", "+", "-", "–" };
string list = new string[parts.Length];
for (int i = 0; i < parts.Length; i++)
{
string a = parts[i];
foreach (string s in sonderzeichen)
{
if (s != "-")
{
a = a.Replace(s, string.Empty);
}
else
{
if (a.Length == 1)
{
a = string.Empty;
}
}
}
list[i] = a;
}
parts = list;

foreach (string a in parts)
{
if (words.ContainsKey(a))
{
words[a] += 1;
}
else
{
words.Add(a, 1);
}

string b = a.ToUpper();
if (b == "")
continue;
List<string> letter = firstletter[b[0]];
if (!letter.Contains(a))
{
letter.Add(a);
}
}
}
}









share|improve this question




















  • 2




    Zen observations such as "they're still same but they shouldn't be same" do not help the people trying to provide an answer. :-/
    – ΩmegaMan
    Nov 7 at 18:34










  • Yea sryy e.g. state-of-the-art it should be state of the art but it didn't change
    – John Erkelens
    Nov 7 at 18:37






  • 1




    Why not just use a regex like Regex.Split(sentence, @"W+");?
    – ggorlen
    Nov 7 at 18:40










  • @ggorlen Didn't know about that I will try
    – John Erkelens
    Nov 7 at 18:42












  • Don't use the stream readers, there so .Net 1, unless you have to. Use File.ReadAllText instead.
    – ΩmegaMan
    Nov 7 at 19:06















up vote
2
down vote

favorite












I have text with 600 words and I'm supposed to delete every quotation marks, numbers(years, dates, ..), digits ,... I should only have words, and I have to put in into dictionary.



So I have tried to go through with for each loop and get the first letter and save it in a list. Then I split every row in a word.
e.g.:




You are pretty.



You
are
pretty


The problem there are words in a row they're still same but they shouldn't be same. I've tried to fix it but I couldn't find any solution.



 public Dictionary<string, int> words = new Dictionary<string, int>();
public Dictionary<char, List<string>> firstletter = new Dictionary<char, List<string>>();
public Aufgabe(string filename)
{
string filler = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ";
foreach (char f in filler)
{
firstletter[f] = new List<string>();
}

Load(filename);

}
public void Load(string filename)
{
List<string> w = new List<string>();
StreamReader r = new StreamReader(filename);



while (!r.EndOfStream)
{
string row = r.ReadLine();
string parts = row.Split(' ');
string sonderzeichen = new string { "@", ",", ".", ";", "'", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "(", ")", "{",
"}", "!", "?", "/", """, "&", "+", "-", "–" };
string list = new string[parts.Length];
for (int i = 0; i < parts.Length; i++)
{
string a = parts[i];
foreach (string s in sonderzeichen)
{
if (s != "-")
{
a = a.Replace(s, string.Empty);
}
else
{
if (a.Length == 1)
{
a = string.Empty;
}
}
}
list[i] = a;
}
parts = list;

foreach (string a in parts)
{
if (words.ContainsKey(a))
{
words[a] += 1;
}
else
{
words.Add(a, 1);
}

string b = a.ToUpper();
if (b == "")
continue;
List<string> letter = firstletter[b[0]];
if (!letter.Contains(a))
{
letter.Add(a);
}
}
}
}









share|improve this question




















  • 2




    Zen observations such as "they're still same but they shouldn't be same" do not help the people trying to provide an answer. :-/
    – ΩmegaMan
    Nov 7 at 18:34










  • Yea sryy e.g. state-of-the-art it should be state of the art but it didn't change
    – John Erkelens
    Nov 7 at 18:37






  • 1




    Why not just use a regex like Regex.Split(sentence, @"W+");?
    – ggorlen
    Nov 7 at 18:40










  • @ggorlen Didn't know about that I will try
    – John Erkelens
    Nov 7 at 18:42












  • Don't use the stream readers, there so .Net 1, unless you have to. Use File.ReadAllText instead.
    – ΩmegaMan
    Nov 7 at 19:06













up vote
2
down vote

favorite









up vote
2
down vote

favorite











I have text with 600 words and I'm supposed to delete every quotation marks, numbers(years, dates, ..), digits ,... I should only have words, and I have to put in into dictionary.



So I have tried to go through with for each loop and get the first letter and save it in a list. Then I split every row in a word.
e.g.:




You are pretty.



You
are
pretty


The problem there are words in a row they're still same but they shouldn't be same. I've tried to fix it but I couldn't find any solution.



 public Dictionary<string, int> words = new Dictionary<string, int>();
public Dictionary<char, List<string>> firstletter = new Dictionary<char, List<string>>();
public Aufgabe(string filename)
{
string filler = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ";
foreach (char f in filler)
{
firstletter[f] = new List<string>();
}

Load(filename);

}
public void Load(string filename)
{
List<string> w = new List<string>();
StreamReader r = new StreamReader(filename);



while (!r.EndOfStream)
{
string row = r.ReadLine();
string parts = row.Split(' ');
string sonderzeichen = new string { "@", ",", ".", ";", "'", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "(", ")", "{",
"}", "!", "?", "/", """, "&", "+", "-", "–" };
string list = new string[parts.Length];
for (int i = 0; i < parts.Length; i++)
{
string a = parts[i];
foreach (string s in sonderzeichen)
{
if (s != "-")
{
a = a.Replace(s, string.Empty);
}
else
{
if (a.Length == 1)
{
a = string.Empty;
}
}
}
list[i] = a;
}
parts = list;

foreach (string a in parts)
{
if (words.ContainsKey(a))
{
words[a] += 1;
}
else
{
words.Add(a, 1);
}

string b = a.ToUpper();
if (b == "")
continue;
List<string> letter = firstletter[b[0]];
if (!letter.Contains(a))
{
letter.Add(a);
}
}
}
}









share|improve this question















I have text with 600 words and I'm supposed to delete every quotation marks, numbers(years, dates, ..), digits ,... I should only have words, and I have to put in into dictionary.



So I have tried to go through with for each loop and get the first letter and save it in a list. Then I split every row in a word.
e.g.:




You are pretty.



You
are
pretty


The problem there are words in a row they're still same but they shouldn't be same. I've tried to fix it but I couldn't find any solution.



 public Dictionary<string, int> words = new Dictionary<string, int>();
public Dictionary<char, List<string>> firstletter = new Dictionary<char, List<string>>();
public Aufgabe(string filename)
{
string filler = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ";
foreach (char f in filler)
{
firstletter[f] = new List<string>();
}

Load(filename);

}
public void Load(string filename)
{
List<string> w = new List<string>();
StreamReader r = new StreamReader(filename);



while (!r.EndOfStream)
{
string row = r.ReadLine();
string parts = row.Split(' ');
string sonderzeichen = new string { "@", ",", ".", ";", "'", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "(", ")", "{",
"}", "!", "?", "/", """, "&", "+", "-", "–" };
string list = new string[parts.Length];
for (int i = 0; i < parts.Length; i++)
{
string a = parts[i];
foreach (string s in sonderzeichen)
{
if (s != "-")
{
a = a.Replace(s, string.Empty);
}
else
{
if (a.Length == 1)
{
a = string.Empty;
}
}
}
list[i] = a;
}
parts = list;

foreach (string a in parts)
{
if (words.ContainsKey(a))
{
words[a] += 1;
}
else
{
words.Add(a, 1);
}

string b = a.ToUpper();
if (b == "")
continue;
List<string> letter = firstletter[b[0]];
if (!letter.Contains(a))
{
letter.Add(a);
}
}
}
}






c#






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 7 at 18:31









gunr2171

7,356104466




7,356104466










asked Nov 7 at 18:28









John Erkelens

175




175








  • 2




    Zen observations such as "they're still same but they shouldn't be same" do not help the people trying to provide an answer. :-/
    – ΩmegaMan
    Nov 7 at 18:34










  • Yea sryy e.g. state-of-the-art it should be state of the art but it didn't change
    – John Erkelens
    Nov 7 at 18:37






  • 1




    Why not just use a regex like Regex.Split(sentence, @"W+");?
    – ggorlen
    Nov 7 at 18:40










  • @ggorlen Didn't know about that I will try
    – John Erkelens
    Nov 7 at 18:42












  • Don't use the stream readers, there so .Net 1, unless you have to. Use File.ReadAllText instead.
    – ΩmegaMan
    Nov 7 at 19:06














  • 2




    Zen observations such as "they're still same but they shouldn't be same" do not help the people trying to provide an answer. :-/
    – ΩmegaMan
    Nov 7 at 18:34










  • Yea sryy e.g. state-of-the-art it should be state of the art but it didn't change
    – John Erkelens
    Nov 7 at 18:37






  • 1




    Why not just use a regex like Regex.Split(sentence, @"W+");?
    – ggorlen
    Nov 7 at 18:40










  • @ggorlen Didn't know about that I will try
    – John Erkelens
    Nov 7 at 18:42












  • Don't use the stream readers, there so .Net 1, unless you have to. Use File.ReadAllText instead.
    – ΩmegaMan
    Nov 7 at 19:06








2




2




Zen observations such as "they're still same but they shouldn't be same" do not help the people trying to provide an answer. :-/
– ΩmegaMan
Nov 7 at 18:34




Zen observations such as "they're still same but they shouldn't be same" do not help the people trying to provide an answer. :-/
– ΩmegaMan
Nov 7 at 18:34












Yea sryy e.g. state-of-the-art it should be state of the art but it didn't change
– John Erkelens
Nov 7 at 18:37




Yea sryy e.g. state-of-the-art it should be state of the art but it didn't change
– John Erkelens
Nov 7 at 18:37




1




1




Why not just use a regex like Regex.Split(sentence, @"W+");?
– ggorlen
Nov 7 at 18:40




Why not just use a regex like Regex.Split(sentence, @"W+");?
– ggorlen
Nov 7 at 18:40












@ggorlen Didn't know about that I will try
– John Erkelens
Nov 7 at 18:42






@ggorlen Didn't know about that I will try
– John Erkelens
Nov 7 at 18:42














Don't use the stream readers, there so .Net 1, unless you have to. Use File.ReadAllText instead.
– ΩmegaMan
Nov 7 at 19:06




Don't use the stream readers, there so .Net 1, unless you have to. Use File.ReadAllText instead.
– ΩmegaMan
Nov 7 at 19:06












3 Answers
3






active

oldest

votes

















up vote
1
down vote



accepted










There are some things missing in the other answers:




  • No validation is done to check if the text is a word

  • Comparison should not be case-sensitive (i.e. spain, Spain and SPAIN should be considered the same word)


My solution:



StringComparer comparer = StringComparer.OrdinalIgnoreCase;
string text = "The 'rain' in spain falls mainly on the plain. 07 November 2018 20:02:07 - 20180520 I said the Plain in SPAIN. 12345";

var dictionary = Regex.Split(text, @"W+")
.Where(IsValidWord)
.GroupBy(m => m, comparer)
.ToDictionary(m => m.Key, m => m.Count(), comparer);


Method IsValidWord:



// logic to validate word goes here
private static bool IsValidWord(string text)
{
double value;

bool isNumeric = double.TryParse(text, out value);

// add more validation rules here

return !isNumeric;
}


EDIT



I noticed in your code that you have a Dictionary with the words grouped by first letter. This can be achieved like this (using the previous dictionary):



var lettersDictionary = dictionary.Keys.GroupBy(x => x.Substring(0, 1), 
(alphabet, subList) => new {
Alphabet = alphabet,
SubList = subList.OrderBy(x => x, comparer).ToList()
})
.ToDictionary(m => m.Alphabet, m => m.SubList, comparer);





share|improve this answer






























    up vote
    1
    down vote













    You can just split with a regex, then use LINQ to create your dictionary:



    var dictionary = Regex.Split(text, @"W+")
    .GroupBy(m => m, StringComparer.OrdinalIgnoreCase) // Case-insensitive
    .ToDictionary(m => m.Key, m => m.Count());


    UPDATE



    In applying to your example code, your task class could become something like this to build both dictionaries (and to consider case insensitive):



    public class Aufgabe
    {
    const string ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ";
    public Dictionary<string, int> words;
    public Dictionary<char, List<string>> firstletter;
    public Aufgabe(string filename)
    {
    var text = File.ReadAllText(filename);
    words = Regex.Split(text, @"W+")
    .GroupBy(m => m, StringComparer.OrdinalIgnoreCase)
    .ToDictionary(m => m.Key, m => m.Count());
    firstletter = ALPHABET.ToDictionary(a => a, // First-letter key
    a => words.Keys.Where(m => a == char.ToUpper(m[0])).ToList()); // Words
    }
    }





    share|improve this answer






























      up vote
      0
      down vote













      Here is one way with Regex, note that case sensitivity has not been addressed



      var text = "The 'rain' in spain falls mainly on the plain. I said the plain in spain";

      var result = new Dictionary<string,string>();

      Regex.Matches(text, @"[^s]+")
      .OfType<Match>()
      .Select(m => Regex.Replace(m.Value, @"W", string.Empty))
      .ToList()
      .ForEach(word =>
      {
      if (!result.ContainsKey(word))
      result.Add(word, word);
      });


      result



      enter image description here






      share|improve this answer





















        Your Answer






        StackExchange.ifUsing("editor", function () {
        StackExchange.using("externalEditor", function () {
        StackExchange.using("snippets", function () {
        StackExchange.snippets.init();
        });
        });
        }, "code-snippets");

        StackExchange.ready(function() {
        var channelOptions = {
        tags: "".split(" "),
        id: "1"
        };
        initTagRenderer("".split(" "), "".split(" "), channelOptions);

        StackExchange.using("externalEditor", function() {
        // Have to fire editor after snippets, if snippets enabled
        if (StackExchange.settings.snippets.snippetsEnabled) {
        StackExchange.using("snippets", function() {
        createEditor();
        });
        }
        else {
        createEditor();
        }
        });

        function createEditor() {
        StackExchange.prepareEditor({
        heartbeatType: 'answer',
        convertImagesToLinks: true,
        noModals: true,
        showLowRepImageUploadWarning: true,
        reputationToPostImages: 10,
        bindNavPrevention: true,
        postfix: "",
        imageUploader: {
        brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
        contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
        allowUrls: true
        },
        onDemand: true,
        discardSelector: ".discard-answer"
        ,immediatelyShowMarkdownHelp:true
        });


        }
        });














         

        draft saved


        draft discarded


















        StackExchange.ready(
        function () {
        StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53195612%2fsplitting-text-and-putting-it-into-dictionary%23new-answer', 'question_page');
        }
        );

        Post as a guest















        Required, but never shown

























        3 Answers
        3






        active

        oldest

        votes








        3 Answers
        3






        active

        oldest

        votes









        active

        oldest

        votes






        active

        oldest

        votes








        up vote
        1
        down vote



        accepted










        There are some things missing in the other answers:




        • No validation is done to check if the text is a word

        • Comparison should not be case-sensitive (i.e. spain, Spain and SPAIN should be considered the same word)


        My solution:



        StringComparer comparer = StringComparer.OrdinalIgnoreCase;
        string text = "The 'rain' in spain falls mainly on the plain. 07 November 2018 20:02:07 - 20180520 I said the Plain in SPAIN. 12345";

        var dictionary = Regex.Split(text, @"W+")
        .Where(IsValidWord)
        .GroupBy(m => m, comparer)
        .ToDictionary(m => m.Key, m => m.Count(), comparer);


        Method IsValidWord:



        // logic to validate word goes here
        private static bool IsValidWord(string text)
        {
        double value;

        bool isNumeric = double.TryParse(text, out value);

        // add more validation rules here

        return !isNumeric;
        }


        EDIT



        I noticed in your code that you have a Dictionary with the words grouped by first letter. This can be achieved like this (using the previous dictionary):



        var lettersDictionary = dictionary.Keys.GroupBy(x => x.Substring(0, 1), 
        (alphabet, subList) => new {
        Alphabet = alphabet,
        SubList = subList.OrderBy(x => x, comparer).ToList()
        })
        .ToDictionary(m => m.Alphabet, m => m.SubList, comparer);





        share|improve this answer



























          up vote
          1
          down vote



          accepted










          There are some things missing in the other answers:




          • No validation is done to check if the text is a word

          • Comparison should not be case-sensitive (i.e. spain, Spain and SPAIN should be considered the same word)


          My solution:



          StringComparer comparer = StringComparer.OrdinalIgnoreCase;
          string text = "The 'rain' in spain falls mainly on the plain. 07 November 2018 20:02:07 - 20180520 I said the Plain in SPAIN. 12345";

          var dictionary = Regex.Split(text, @"W+")
          .Where(IsValidWord)
          .GroupBy(m => m, comparer)
          .ToDictionary(m => m.Key, m => m.Count(), comparer);


          Method IsValidWord:



          // logic to validate word goes here
          private static bool IsValidWord(string text)
          {
          double value;

          bool isNumeric = double.TryParse(text, out value);

          // add more validation rules here

          return !isNumeric;
          }


          EDIT



          I noticed in your code that you have a Dictionary with the words grouped by first letter. This can be achieved like this (using the previous dictionary):



          var lettersDictionary = dictionary.Keys.GroupBy(x => x.Substring(0, 1), 
          (alphabet, subList) => new {
          Alphabet = alphabet,
          SubList = subList.OrderBy(x => x, comparer).ToList()
          })
          .ToDictionary(m => m.Alphabet, m => m.SubList, comparer);





          share|improve this answer

























            up vote
            1
            down vote



            accepted







            up vote
            1
            down vote



            accepted






            There are some things missing in the other answers:




            • No validation is done to check if the text is a word

            • Comparison should not be case-sensitive (i.e. spain, Spain and SPAIN should be considered the same word)


            My solution:



            StringComparer comparer = StringComparer.OrdinalIgnoreCase;
            string text = "The 'rain' in spain falls mainly on the plain. 07 November 2018 20:02:07 - 20180520 I said the Plain in SPAIN. 12345";

            var dictionary = Regex.Split(text, @"W+")
            .Where(IsValidWord)
            .GroupBy(m => m, comparer)
            .ToDictionary(m => m.Key, m => m.Count(), comparer);


            Method IsValidWord:



            // logic to validate word goes here
            private static bool IsValidWord(string text)
            {
            double value;

            bool isNumeric = double.TryParse(text, out value);

            // add more validation rules here

            return !isNumeric;
            }


            EDIT



            I noticed in your code that you have a Dictionary with the words grouped by first letter. This can be achieved like this (using the previous dictionary):



            var lettersDictionary = dictionary.Keys.GroupBy(x => x.Substring(0, 1), 
            (alphabet, subList) => new {
            Alphabet = alphabet,
            SubList = subList.OrderBy(x => x, comparer).ToList()
            })
            .ToDictionary(m => m.Alphabet, m => m.SubList, comparer);





            share|improve this answer














            There are some things missing in the other answers:




            • No validation is done to check if the text is a word

            • Comparison should not be case-sensitive (i.e. spain, Spain and SPAIN should be considered the same word)


            My solution:



            StringComparer comparer = StringComparer.OrdinalIgnoreCase;
            string text = "The 'rain' in spain falls mainly on the plain. 07 November 2018 20:02:07 - 20180520 I said the Plain in SPAIN. 12345";

            var dictionary = Regex.Split(text, @"W+")
            .Where(IsValidWord)
            .GroupBy(m => m, comparer)
            .ToDictionary(m => m.Key, m => m.Count(), comparer);


            Method IsValidWord:



            // logic to validate word goes here
            private static bool IsValidWord(string text)
            {
            double value;

            bool isNumeric = double.TryParse(text, out value);

            // add more validation rules here

            return !isNumeric;
            }


            EDIT



            I noticed in your code that you have a Dictionary with the words grouped by first letter. This can be achieved like this (using the previous dictionary):



            var lettersDictionary = dictionary.Keys.GroupBy(x => x.Substring(0, 1), 
            (alphabet, subList) => new {
            Alphabet = alphabet,
            SubList = subList.OrderBy(x => x, comparer).ToList()
            })
            .ToDictionary(m => m.Alphabet, m => m.SubList, comparer);






            share|improve this answer














            share|improve this answer



            share|improve this answer








            edited Nov 7 at 19:50

























            answered Nov 7 at 19:08









            Rui Jarimba

            6,95662958




            6,95662958
























                up vote
                1
                down vote













                You can just split with a regex, then use LINQ to create your dictionary:



                var dictionary = Regex.Split(text, @"W+")
                .GroupBy(m => m, StringComparer.OrdinalIgnoreCase) // Case-insensitive
                .ToDictionary(m => m.Key, m => m.Count());


                UPDATE



                In applying to your example code, your task class could become something like this to build both dictionaries (and to consider case insensitive):



                public class Aufgabe
                {
                const string ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ";
                public Dictionary<string, int> words;
                public Dictionary<char, List<string>> firstletter;
                public Aufgabe(string filename)
                {
                var text = File.ReadAllText(filename);
                words = Regex.Split(text, @"W+")
                .GroupBy(m => m, StringComparer.OrdinalIgnoreCase)
                .ToDictionary(m => m.Key, m => m.Count());
                firstletter = ALPHABET.ToDictionary(a => a, // First-letter key
                a => words.Keys.Where(m => a == char.ToUpper(m[0])).ToList()); // Words
                }
                }





                share|improve this answer



























                  up vote
                  1
                  down vote













                  You can just split with a regex, then use LINQ to create your dictionary:



                  var dictionary = Regex.Split(text, @"W+")
                  .GroupBy(m => m, StringComparer.OrdinalIgnoreCase) // Case-insensitive
                  .ToDictionary(m => m.Key, m => m.Count());


                  UPDATE



                  In applying to your example code, your task class could become something like this to build both dictionaries (and to consider case insensitive):



                  public class Aufgabe
                  {
                  const string ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ";
                  public Dictionary<string, int> words;
                  public Dictionary<char, List<string>> firstletter;
                  public Aufgabe(string filename)
                  {
                  var text = File.ReadAllText(filename);
                  words = Regex.Split(text, @"W+")
                  .GroupBy(m => m, StringComparer.OrdinalIgnoreCase)
                  .ToDictionary(m => m.Key, m => m.Count());
                  firstletter = ALPHABET.ToDictionary(a => a, // First-letter key
                  a => words.Keys.Where(m => a == char.ToUpper(m[0])).ToList()); // Words
                  }
                  }





                  share|improve this answer

























                    up vote
                    1
                    down vote










                    up vote
                    1
                    down vote









                    You can just split with a regex, then use LINQ to create your dictionary:



                    var dictionary = Regex.Split(text, @"W+")
                    .GroupBy(m => m, StringComparer.OrdinalIgnoreCase) // Case-insensitive
                    .ToDictionary(m => m.Key, m => m.Count());


                    UPDATE



                    In applying to your example code, your task class could become something like this to build both dictionaries (and to consider case insensitive):



                    public class Aufgabe
                    {
                    const string ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ";
                    public Dictionary<string, int> words;
                    public Dictionary<char, List<string>> firstletter;
                    public Aufgabe(string filename)
                    {
                    var text = File.ReadAllText(filename);
                    words = Regex.Split(text, @"W+")
                    .GroupBy(m => m, StringComparer.OrdinalIgnoreCase)
                    .ToDictionary(m => m.Key, m => m.Count());
                    firstletter = ALPHABET.ToDictionary(a => a, // First-letter key
                    a => words.Keys.Where(m => a == char.ToUpper(m[0])).ToList()); // Words
                    }
                    }





                    share|improve this answer














                    You can just split with a regex, then use LINQ to create your dictionary:



                    var dictionary = Regex.Split(text, @"W+")
                    .GroupBy(m => m, StringComparer.OrdinalIgnoreCase) // Case-insensitive
                    .ToDictionary(m => m.Key, m => m.Count());


                    UPDATE



                    In applying to your example code, your task class could become something like this to build both dictionaries (and to consider case insensitive):



                    public class Aufgabe
                    {
                    const string ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ";
                    public Dictionary<string, int> words;
                    public Dictionary<char, List<string>> firstletter;
                    public Aufgabe(string filename)
                    {
                    var text = File.ReadAllText(filename);
                    words = Regex.Split(text, @"W+")
                    .GroupBy(m => m, StringComparer.OrdinalIgnoreCase)
                    .ToDictionary(m => m.Key, m => m.Count());
                    firstletter = ALPHABET.ToDictionary(a => a, // First-letter key
                    a => words.Keys.Where(m => a == char.ToUpper(m[0])).ToList()); // Words
                    }
                    }






                    share|improve this answer














                    share|improve this answer



                    share|improve this answer








                    edited Nov 7 at 19:49

























                    answered Nov 7 at 18:45









                    Jason W

                    10.1k21342




                    10.1k21342






















                        up vote
                        0
                        down vote













                        Here is one way with Regex, note that case sensitivity has not been addressed



                        var text = "The 'rain' in spain falls mainly on the plain. I said the plain in spain";

                        var result = new Dictionary<string,string>();

                        Regex.Matches(text, @"[^s]+")
                        .OfType<Match>()
                        .Select(m => Regex.Replace(m.Value, @"W", string.Empty))
                        .ToList()
                        .ForEach(word =>
                        {
                        if (!result.ContainsKey(word))
                        result.Add(word, word);
                        });


                        result



                        enter image description here






                        share|improve this answer

























                          up vote
                          0
                          down vote













                          Here is one way with Regex, note that case sensitivity has not been addressed



                          var text = "The 'rain' in spain falls mainly on the plain. I said the plain in spain";

                          var result = new Dictionary<string,string>();

                          Regex.Matches(text, @"[^s]+")
                          .OfType<Match>()
                          .Select(m => Regex.Replace(m.Value, @"W", string.Empty))
                          .ToList()
                          .ForEach(word =>
                          {
                          if (!result.ContainsKey(word))
                          result.Add(word, word);
                          });


                          result



                          enter image description here






                          share|improve this answer























                            up vote
                            0
                            down vote










                            up vote
                            0
                            down vote









                            Here is one way with Regex, note that case sensitivity has not been addressed



                            var text = "The 'rain' in spain falls mainly on the plain. I said the plain in spain";

                            var result = new Dictionary<string,string>();

                            Regex.Matches(text, @"[^s]+")
                            .OfType<Match>()
                            .Select(m => Regex.Replace(m.Value, @"W", string.Empty))
                            .ToList()
                            .ForEach(word =>
                            {
                            if (!result.ContainsKey(word))
                            result.Add(word, word);
                            });


                            result



                            enter image description here






                            share|improve this answer












                            Here is one way with Regex, note that case sensitivity has not been addressed



                            var text = "The 'rain' in spain falls mainly on the plain. I said the plain in spain";

                            var result = new Dictionary<string,string>();

                            Regex.Matches(text, @"[^s]+")
                            .OfType<Match>()
                            .Select(m => Regex.Replace(m.Value, @"W", string.Empty))
                            .ToList()
                            .ForEach(word =>
                            {
                            if (!result.ContainsKey(word))
                            result.Add(word, word);
                            });


                            result



                            enter image description here







                            share|improve this answer












                            share|improve this answer



                            share|improve this answer










                            answered Nov 7 at 18:45









                            ΩmegaMan

                            15.7k44160




                            15.7k44160






























                                 

                                draft saved


                                draft discarded



















































                                 


                                draft saved


                                draft discarded














                                StackExchange.ready(
                                function () {
                                StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53195612%2fsplitting-text-and-putting-it-into-dictionary%23new-answer', 'question_page');
                                }
                                );

                                Post as a guest















                                Required, but never shown





















































                                Required, but never shown














                                Required, but never shown












                                Required, but never shown







                                Required, but never shown

































                                Required, but never shown














                                Required, but never shown












                                Required, but never shown







                                Required, but never shown







                                這個網誌中的熱門文章

                                Hercules Kyvelos

                                Tangent Lines Diagram Along Smooth Curve

                                Yusuf al-Mu'taman ibn Hud