"voice_segments": [
{
"start_time_seconds": 0.0,
"end_time_seconds": 3.112,
"character_start_index": 0,
"character_end_index": 37,
"dialogue_input_index": 0
},
{
"start_time_seconds": 3.112,
"end_time_seconds": 8.227,
"character_start_index": 37,
"character_end_index": 88,
"dialogue_input_index": 1
},
{
"start_time_seconds": 8.227,
"end_time_seconds": 12.059000000000001,
"character_start_index": 88,
"character_end_index": 127,
"dialogue_input_index": 2
},
{
"start_time_seconds": 12.059000000000001,
"end_time_seconds": 21.881,
"character_start_index": 127,
"character_end_index": 252,
"dialogue_input_index": 3
},
{
"start_time_seconds": 21.881,
"end_time_seconds": 36.422999999999995,
"character_start_index": 252,
"character_end_index": 421,
"dialogue_input_index": 4
},
{
"start_time_seconds": 36.422999999999995,
"end_time_seconds": 47.361999999999995,
"character_start_index": 421,
"character_end_index": 532,
"dialogue_input_index": 5
},
{
"start_time_seconds": 47.361999999999995,
"end_time_seconds": 56.071,
"character_start_index": 532,
"character_end_index": 639,
"dialogue_input_index": 6
},
{
"start_time_seconds": 56.071,
"end_time_seconds": 60.778999999999996,
"character_start_index": 639,
"character_end_index": 700,
"dialogue_input_index": 7
},
{
"start_time_seconds": 60.778999999999996,
"end_time_seconds": 62.324999999999996,
"character_start_index": 700,
"character_end_index": 712,
"dialogue_input_index": 8
},
{
"start_time_seconds": 62.324999999999996,
"end_time_seconds": 65.40899999999999,
"character_start_index": 712,
"character_end_index": 775,
"dialogue_input_index": 9
},
{
"start_time_seconds": 65.40899999999999,
"end_time_seconds": 68.11399999999999,
"character_start_index": 775,
"character_end_index": 825,
"dialogue_input_index": 10
},
{
"start_time_seconds": 68.11399999999999,
"end_time_seconds": 70.37799999999999,
"character_start_index": 825,
"character_end_index": 847,
"dialogue_input_index": 11
},
{
"start_time_seconds": 70.37799999999999,
"end_time_seconds": 76.13799999999999,
"character_start_index": 847,
"character_end_index": 930,
"dialogue_input_index": 12
},
{
"start_time_seconds": 76.13799999999999,
"end_time_seconds": 76.13799999999999,
"character_start_index": 930,
"character_end_index": 940,
"dialogue_input_index": 13
},
{
"start_time_seconds": 76.13799999999999,
"end_time_seconds": 76.13799999999999,
"character_start_index": 940,
"character_end_index": 987,
"dialogue_input_index": 14
},
{
"start_time_seconds": 76.13799999999999,
"end_time_seconds": 76.13799999999999,
"character_start_index": 987,
"character_end_index": 1042,
"dialogue_input_index": 15
},
{
"start_time_seconds": 76.13799999999999,
"end_time_seconds": 76.41799999999999,
"character_start_index": 1042,
"character_end_index": 1079,
"dialogue_input_index": 16
},
{
"start_time_seconds": 76.41799999999999,
"end_time_seconds": 76.41799999999999,
"character_start_index": 1079,
"character_end_index": 1114,
"dialogue_input_index": 17
},
{
"start_time_seconds": 76.41799999999999,
"end_time_seconds": 76.41799999999999,
"character_start_index": 1114,
"character_end_index": 1161,
"dialogue_input_index": 18
},
{
"start_time_seconds": 76.41799999999999,
"end_time_seconds": 76.41799999999999,
"character_start_index": 1161,
"character_end_index": 1171,
"dialogue_input_index": 19
},
{
"start_time_seconds": 76.41799999999999,
"end_time_seconds": 81.13099999999999,
"character_start_index": 1171,
"character_end_index": 1254,
"dialogue_input_index": 20
},
{
"start_time_seconds": 81.13099999999999,
"end_time_seconds": 81.13099999999999,
"character_start_index": 1254,
"character_end_index": 1314,
"dialogue_input_index": 21
},
{
"start_time_seconds": 81.13099999999999,
"end_time_seconds": 82.77999999999999,
"character_start_index": 1314,
"character_end_index": 1410,
"dialogue_input_index": 22
},
{
"start_time_seconds": 82.77999999999999,
"end_time_seconds": 82.77999999999999,
"character_start_index": 1410,
"character_end_index": 1474,
"dialogue_input_index": 23
},
{
"start_time_seconds": 82.77999999999999,
"end_time_seconds": 83.60199999999999,
"character_start_index": 1474,
"character_end_index": 1570,
"dialogue_input_index": 24
},
{
"start_time_seconds": 83.60199999999999,
"end_time_seconds": 83.60199999999999,
"character_start_index": 1570,
"character_end_index": 1690,
"dialogue_input_index": 25
},
{
"start_time_seconds": 83.60199999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 1690,
"character_end_index": 1711,
"dialogue_input_index": 26
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 1711,
"character_end_index": 1732,
"dialogue_input_index": 27
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 1732,
"character_end_index": 1751,
"dialogue_input_index": 28
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 1751,
"character_end_index": 1761,
"dialogue_input_index": 29
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 1761,
"character_end_index": 1827,
"dialogue_input_index": 30
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 1827,
"character_end_index": 1952,
"dialogue_input_index": 31
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 1952,
"character_end_index": 2100,
"dialogue_input_index": 32
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 2100,
"character_end_index": 2123,
"dialogue_input_index": 33
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 2123,
"character_end_index": 2133,
"dialogue_input_index": 34
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 2133,
"character_end_index": 2164,
"dialogue_input_index": 35
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 2164,
"character_end_index": 2181,
"dialogue_input_index": 36
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 2181,
"character_end_index": 2383,
"dialogue_input_index": 37
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 2383,
"character_end_index": 2386,
"dialogue_input_index": 38
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 2386,
"character_end_index": 2399,
"dialogue_input_index": 39
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 2399,
"character_end_index": 2428,
"dialogue_input_index": 40
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 85.39099999999999,
"character_start_index": 2428,
"character_end_index": 2437,
"dialogue_input_index": 41
},
{
"start_time_seconds": 85.39099999999999,
"end_time_seconds": 86.07799999999999,
"character_start_index": 2437,
"character_end_index": 2552,
"dialogue_input_index": 42
},
{
"start_time_seconds": 86.07799999999999,
"end_time_seconds": 86.07799999999999,
"character_start_index": 2552,
"character_end_index": 2555,
"dialogue_input_index": 43
},
{
"start_time_seconds": 86.07799999999999,
"end_time_seconds": 86.07799999999999,
"character_start_index": 2555,
"character_end_index": 2616,
"dialogue_input_index": 44
},
{
"start_time_seconds": 86.07799999999999,
"end_time_seconds": 86.07799999999999,
"character_start_index": 2616,
"character_end_index": 2645,
"dialogue_input_index": 45
},
{
"start_time_seconds": 86.07799999999999,
"end_time_seconds": 88.23599999999999,
"character_start_index": 2645,
"character_end_index": 2660,
"dialogue_input_index": 46
},
{
"start_time_seconds": 88.23599999999999,
"end_time_seconds": 98.05199999999998,
"character_start_index": 2660,
"character_end_index": 2674,
"dialogue_input_index": 47
}
]
Description
Steps to Reproduce
Expected Behavior
Accurate timings to be returned
Observed Behavior
Many duplicate timings even though the
character_start_indexandcharacter_end_indexincrease while thestart_time_secondsremains exactly the same. (See additional context)Code example
Additional context
Duplicate Timings Example Response
Please note the
character_start_indexandcharacter_end_indexincrease while thestart_time_secondsremains exactly the same:Details
Related Issue
"return word level alignment when running text_to_speech.convert_with_timestamps" #556