Files
Sandbox/TranscriptData.ps1
T
Zack Meier 03dba08135 sync
2026-04-15 15:42:41 -05:00

58 lines
1.8 KiB
PowerShell

$transcript = Get-Content D:\Downloads\transcript.txt
$RawResult = [System.Collections.ArrayList]@()
$x = 0
While ($x -lt (@($transcript).count)) {
$obj = [pscustomobject]@{
timestamp = $transcript[$x + 0]
speaker = $transcript[$x + 1]
words = $transcript[$x + 2]
}
$null = $RawResult.Add($obj)
$x = $x + 3
}
$WordsResult = [System.Collections.ArrayList]@()
ForEach ($dialog in $RawResult) {
ForEach ($word in $dialog.words.split(' ')) {
$word = [regex]::Replace($word, "[^a-zA-Z0-9\s]", "")
$obj = [pscustomobject]@{
timestamp = $dialog.timestamp
speaker = $dialog.speaker
word = $word
}
$null = $WordsResult.Add($obj)
}
}
# Top 300 Words
$WordsResult | Group-Object Word | Sort-Object -Descending Count | select -First 300
# Top words by speaker
$WordsBySpeakerResult = [System.Collections.ArrayList]@()
$DialogBySpeaker = $WordsResult | Group-Object speaker
ForEach ($speaker in $DialogBySpeaker) {
$Top10SpeakerWords = $speaker.Group | Group-Object Word | Sort-Object -Descending Count | select -First 10
$obj = [pscustomobject]@{
speaker = $speaker.name
word1 = $Top10SpeakerWords[0].Name
word2 = $Top10SpeakerWords[1].Name
word3 = $Top10SpeakerWords[2].Name
word4 = $Top10SpeakerWords[3].Name
word5 = $Top10SpeakerWords[4].Name
word6 = $Top10SpeakerWords[5].Name
word7 = $Top10SpeakerWords[6].Name
word8 = $Top10SpeakerWords[7].Name
word9 = $Top10SpeakerWords[8].Name
word10 = $Top10SpeakerWords[9].Name
}
$null = $WordsBySpeakerResult.Add($obj)
}
$TopNouns = (Import-Csv "D:\Downloads\english-word-list-nouns.csv" -Delimiter ';').word
$FilteredWordsResult = $WordsResult