$transcript = Get-Content D:\Downloads\transcript.txt $RawResult = [System.Collections.ArrayList]@() $x = 0 While ($x -lt (@($transcript).count)) { $obj = [pscustomobject]@{ timestamp = $transcript[$x + 0] speaker = $transcript[$x + 1] words = $transcript[$x + 2] } $null = $RawResult.Add($obj) $x = $x + 3 } $WordsResult = [System.Collections.ArrayList]@() ForEach ($dialog in $RawResult) { ForEach ($word in $dialog.words.split(' ')) { $word = [regex]::Replace($word, "[^a-zA-Z0-9\s]", "") $obj = [pscustomobject]@{ timestamp = $dialog.timestamp speaker = $dialog.speaker word = $word } $null = $WordsResult.Add($obj) } } # Top 300 Words $WordsResult | Group-Object Word | Sort-Object -Descending Count | select -First 300 # Top words by speaker $WordsBySpeakerResult = [System.Collections.ArrayList]@() $DialogBySpeaker = $WordsResult | Group-Object speaker ForEach ($speaker in $DialogBySpeaker) { $Top10SpeakerWords = $speaker.Group | Group-Object Word | Sort-Object -Descending Count | select -First 10 $obj = [pscustomobject]@{ speaker = $speaker.name word1 = $Top10SpeakerWords[0].Name word2 = $Top10SpeakerWords[1].Name word3 = $Top10SpeakerWords[2].Name word4 = $Top10SpeakerWords[3].Name word5 = $Top10SpeakerWords[4].Name word6 = $Top10SpeakerWords[5].Name word7 = $Top10SpeakerWords[6].Name word8 = $Top10SpeakerWords[7].Name word9 = $Top10SpeakerWords[8].Name word10 = $Top10SpeakerWords[9].Name } $null = $WordsBySpeakerResult.Add($obj) } $TopNouns = (Import-Csv "D:\Downloads\english-word-list-nouns.csv" -Delimiter ';').word $FilteredWordsResult = $WordsResult