2015-04-17 71 views
1

我是C编程新手,已经找到了这个程序。它需要一个文本并计算单词的频率。我遇到的问题是,当两个或更多的单词出现相同的次数时,这些单词需要按字母顺序排序,但我不知道如何。按字母顺序排列字符串C

下面是代码:

#include <stdio.h> 
#include <string.h> 
#include <ctype.h> 
#include <stdlib.h> 

#define MAXWORDS 10000 
#define MAXSTRING 100 

/* structure holding word frequency information */ 

typedef struct _word { 
    char s[MAXSTRING]; /* the word */ 
    int count;  /* number of times word occurs */ 
} word; 

void insert_word(word *words, int *n, char *s) { 
    int i; 

    for (i = 0; i < *n; i++) { 
     if (strcmp(s, words[i].s) == 0) { 
      /* found it? increment and return. */ 
      words[i].count++; 
      return; 
     } 
    }  
    strcpy(words[*n].s, s); 

    /* this word has occurred once up to now, so count = 1 */ 
    words[*n].count = 1; 

    /* one more word */ 
    (*n)++; 
} 

/* comparison function for quicksort. this lets quicksort sort words 
* by descending order of count, i.e., from most to least frequent 
*/ 
int wordcmp(word *a, word *b) { 
    if (a->count < b->count) return +1; 
    if (a->count > b->count) return -1; 
    return 0; 
} 

/* return 1 if c is alphabetic (a..z or A..Z), 0 otherwise */ 
int is_alpha(char c) { 
    if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) return 1; 
    return 0; 
} 

/* remove the i'th character from the string s */ 
void remove_char (char *s, int i) { 
    while (s[i]) { 
     i++; 
     s[i-1] = s[i]; 
    } 
    s[i] = 0; 
} 

/* remove non-alphabetic characters from the string s */ 
void remove_non_alpha(char *s) { 
    int i; 

    for (i = 0; s[i]; i++) { 
     if (!is_alpha (s[i])) 
      remove_char (s, i); 
    } 
} 

/* make all the letters in s lowercase */ 
void make_lowercase(char *s) { 
    int i; 

    for (i = 0; s[i]; i++) 
     s[i] = tolower(s[i]); 
} 

/* main program */ 
int main() { 
    word words[MAXWORDS]; 
    char s[1000]; 
    int i, n, m; 

    n = 0; 
    int a; 
    scanf("%d",&a); 

    /* read all the words in the file... */ 

    while (!feof(stdin)) { 
     scanf("%s", s); 

     if (is_alpha(s[0])) { 
      remove_non_alpha(s); 
      make_lowercase(s); 
      insert_word(words, &n, s); 
     } 
    } 

    qsort((void *)words, n, sizeof(word), 
      (int (*)(const void *, const void *))wordcmp); 

    /* if fewer than 20 words in total, just print up the the 
    * first n words 
    */ 
    if (n < a) 
     m = n; 
    else 
     m = a; 

    /* print the words with their frequencies */ 
    for (i = 0; i < m; i++) 
     printf("%s %d\n", words[i].s, words[i].count); 
} 
+0

当“两个或更多的单词出现相同的次数”时,你的输出是什么?想想看,这是什么意思? –

回答

2

你应该细化对比功能:如果出现次数相等,返回字符串本身的比较:

/* comparison function for quicksort. this lets quicksort sort words 
* by descending order of count, i.e., from most to least frequent. 
* words with equal frequency will be sorted in ascending lexicographical 
* order. 
*/ 
int wordcmp (word *a, word *b) { 
    if (a->count < b->count) return +1; 
    if (a->count > b->count) return -1; 
    return strcmp(a->s, b->s); 
} 

另外请注意,您解析循环不正确:while (!feof(stdin))在文件结束时没有正确停止,最后一个词被解析两次。你应该在逻辑更改为:

while (scanf("%999s", s) == 1) { 
    ... 
} 

格式"%999s"防止造成缓冲区溢出过长的话。这样一个长长的单词将被无声地分割,因此会略微偏移统计量,而不是调用未定义的行为(潜在的崩溃)。

+0

注意:'strcmp'通常不是字母(它比较字节值)。虽然在这种情况下它*是*按字母顺序排列(输入是小写的a..z)。 – jfs

+0

@ J.F。塞巴斯蒂安:没错! 'strcmp()'在'C'语言环境中执行字典式比较,即逐字节比较。如果OP想要更精细的排序方法,他可以使用'strcoll()'来代替,并通过正确定义和选择来祈祷区域设置。即使只使用小写字母a-z,西班牙语的适当整理与“strcmp”将产生的不同:“ll”和“ch”需要特殊处理。 – chqrlie